2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* TagPartitionedLogSystem.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "flow/ActorCollection.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/LogSystem.h"
|
|
|
|
#include "fdbserver/ServerDBInfo.h"
|
|
|
|
#include "fdbserver/DBCoreState.h"
|
|
|
|
#include "fdbserver/WaitFailure.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbclient/SystemData.h"
|
|
|
|
#include "fdbrpc/simulator.h"
|
|
|
|
#include "fdbrpc/Replication.h"
|
|
|
|
#include "fdbrpc/ReplicationUtils.h"
|
2021-05-31 02:51:47 +08:00
|
|
|
#include "fdbserver/Knobs.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/RecoveryState.h"
|
2020-03-27 03:39:07 +08:00
|
|
|
#include "fdbserver/LogProtocolMessage.h"
|
2021-03-11 02:06:03 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-29 05:40:50 +08:00
|
|
|
ACTOR Future<Version> minVersionWhenReady(Future<Void> f, std::vector<Future<TLogCommitReply>> replies) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(f);
|
2018-06-22 06:29:46 +08:00
|
|
|
Version minVersion = std::numeric_limits<Version>::max();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& reply : replies) {
|
|
|
|
if (reply.isReady() && !reply.isError()) {
|
2019-08-29 05:40:50 +08:00
|
|
|
minVersion = std::min(minVersion, reply.get().version);
|
2018-06-22 06:29:46 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-06-22 06:29:46 +08:00
|
|
|
return minVersion;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 09:39:51 +08:00
|
|
|
// TagPartitionedLogSystem info in old epoch
|
2017-07-10 05:46:16 +08:00
|
|
|
struct OldLogData {
|
2017-07-12 06:48:10 +08:00
|
|
|
std::vector<Reference<LogSet>> tLogs;
|
2018-04-09 12:24:05 +08:00
|
|
|
int32_t logRouterTags;
|
2019-07-30 10:17:10 +08:00
|
|
|
int32_t txsTags; // The number of txsTags, which may change across generations.
|
2019-10-02 04:38:52 +08:00
|
|
|
Version epochBegin, epochEnd;
|
2019-04-11 01:30:34 +08:00
|
|
|
std::set<int8_t> pseudoLocalities;
|
2019-06-01 07:14:58 +08:00
|
|
|
LogEpoch epoch;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-10-02 04:38:52 +08:00
|
|
|
OldLogData() : epochBegin(0), epochEnd(0), logRouterTags(0), txsTags(0), epoch(0) {}
|
2019-04-20 00:41:09 +08:00
|
|
|
|
|
|
|
// Constructor for T of OldTLogConf and OldTLogCoreData
|
2019-06-01 07:14:58 +08:00
|
|
|
template <class T>
|
2019-04-20 00:41:09 +08:00
|
|
|
explicit OldLogData(const T& conf)
|
2019-10-02 04:38:52 +08:00
|
|
|
: logRouterTags(conf.logRouterTags), txsTags(conf.txsTags), epochBegin(conf.epochBegin), epochEnd(conf.epochEnd),
|
2019-06-01 07:14:58 +08:00
|
|
|
pseudoLocalities(conf.pseudoLocalities), epoch(conf.epoch) {
|
2019-04-19 01:18:11 +08:00
|
|
|
tLogs.resize(conf.tLogs.size());
|
|
|
|
for (int j = 0; j < conf.tLogs.size(); j++) {
|
2020-11-07 15:50:55 +08:00
|
|
|
auto logSet = makeReference<LogSet>(conf.tLogs[j]);
|
2019-04-19 01:18:11 +08:00
|
|
|
tLogs[j] = logSet;
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
};
|
|
|
|
|
2018-03-30 06:12:38 +08:00
|
|
|
struct LogLockInfo {
|
2018-04-22 07:03:28 +08:00
|
|
|
Version epochEnd;
|
2018-04-23 02:14:13 +08:00
|
|
|
bool isCurrent;
|
2018-04-09 12:24:05 +08:00
|
|
|
Reference<LogSet> logSet;
|
2018-03-30 06:12:38 +08:00
|
|
|
std::vector<Future<TLogLockResult>> replies;
|
2018-04-22 07:03:28 +08:00
|
|
|
|
2018-04-23 02:14:13 +08:00
|
|
|
LogLockInfo() : epochEnd(std::numeric_limits<Version>::max()), isCurrent(false) {}
|
2018-03-30 06:12:38 +08:00
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
LogSet::LogSet(const TLogSet& tLogSet)
|
|
|
|
: tLogWriteAntiQuorum(tLogSet.tLogWriteAntiQuorum), tLogReplicationFactor(tLogSet.tLogReplicationFactor),
|
|
|
|
tLogLocalities(tLogSet.tLogLocalities), tLogVersion(tLogSet.tLogVersion), tLogPolicy(tLogSet.tLogPolicy),
|
|
|
|
isLocal(tLogSet.isLocal), locality(tLogSet.locality), startVersion(tLogSet.startVersion),
|
|
|
|
satelliteTagLocations(tLogSet.satelliteTagLocations) {
|
2019-05-21 05:22:31 +08:00
|
|
|
for (const auto& log : tLogSet.tLogs) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logServers.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(log));
|
2019-04-11 02:21:27 +08:00
|
|
|
}
|
2019-05-21 05:22:31 +08:00
|
|
|
for (const auto& log : tLogSet.logRouters) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(log));
|
2019-04-11 02:21:27 +08:00
|
|
|
}
|
2019-05-21 05:22:31 +08:00
|
|
|
for (const auto& log : tLogSet.backupWorkers) {
|
2020-11-07 13:13:10 +08:00
|
|
|
backupWorkers.push_back(makeReference<AsyncVar<OptionalInterface<BackupInterface>>>(log));
|
2019-05-21 05:22:31 +08:00
|
|
|
}
|
2019-04-20 00:41:09 +08:00
|
|
|
filterLocalityDataForPolicy(tLogPolicy, &tLogLocalities);
|
|
|
|
updateLocalitySet(tLogLocalities);
|
2019-04-11 02:21:27 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
LogSet::LogSet(const CoreTLogSet& coreSet)
|
|
|
|
: tLogWriteAntiQuorum(coreSet.tLogWriteAntiQuorum), tLogReplicationFactor(coreSet.tLogReplicationFactor),
|
|
|
|
tLogLocalities(coreSet.tLogLocalities), tLogVersion(coreSet.tLogVersion), tLogPolicy(coreSet.tLogPolicy),
|
|
|
|
isLocal(coreSet.isLocal), locality(coreSet.locality), startVersion(coreSet.startVersion),
|
|
|
|
satelliteTagLocations(coreSet.satelliteTagLocations) {
|
2019-06-01 07:14:58 +08:00
|
|
|
for (const auto& log : coreSet.tLogs) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logServers.push_back(
|
|
|
|
makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(OptionalInterface<TLogInterface>(log)));
|
2019-04-11 02:21:27 +08:00
|
|
|
}
|
2019-09-24 05:14:16 +08:00
|
|
|
// Do NOT recover coreSet.backupWorkers, because master will recruit new ones.
|
2019-04-20 00:41:09 +08:00
|
|
|
filterLocalityDataForPolicy(tLogPolicy, &tLogLocalities);
|
|
|
|
updateLocalitySet(tLogLocalities);
|
2019-04-11 02:21:27 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TLogSet::TLogSet(const LogSet& rhs)
|
|
|
|
: tLogWriteAntiQuorum(rhs.tLogWriteAntiQuorum), tLogReplicationFactor(rhs.tLogReplicationFactor),
|
|
|
|
tLogLocalities(rhs.tLogLocalities), tLogVersion(rhs.tLogVersion), tLogPolicy(rhs.tLogPolicy), isLocal(rhs.isLocal),
|
|
|
|
locality(rhs.locality), startVersion(rhs.startVersion), satelliteTagLocations(rhs.satelliteTagLocations) {
|
2019-04-12 02:36:24 +08:00
|
|
|
for (const auto& tlog : rhs.logServers) {
|
|
|
|
tLogs.push_back(tlog->get());
|
|
|
|
}
|
|
|
|
for (const auto& logRouter : rhs.logRouters) {
|
|
|
|
logRouters.push_back(logRouter->get());
|
|
|
|
}
|
2019-05-21 05:22:31 +08:00
|
|
|
for (const auto& worker : rhs.backupWorkers) {
|
|
|
|
backupWorkers.push_back(worker->get());
|
|
|
|
}
|
2019-04-12 02:36:24 +08:00
|
|
|
}
|
|
|
|
|
2019-06-01 07:14:58 +08:00
|
|
|
OldTLogConf::OldTLogConf(const OldLogData& oldLogData)
|
2019-10-02 04:38:52 +08:00
|
|
|
: logRouterTags(oldLogData.logRouterTags), txsTags(oldLogData.txsTags), epochBegin(oldLogData.epochBegin),
|
|
|
|
epochEnd(oldLogData.epochEnd), pseudoLocalities(oldLogData.pseudoLocalities), epoch(oldLogData.epoch) {
|
2019-04-19 04:41:37 +08:00
|
|
|
for (const Reference<LogSet>& logSet : oldLogData.tLogs) {
|
|
|
|
tLogs.emplace_back(*logSet);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
CoreTLogSet::CoreTLogSet(const LogSet& logset)
|
|
|
|
: tLogWriteAntiQuorum(logset.tLogWriteAntiQuorum), tLogReplicationFactor(logset.tLogReplicationFactor),
|
|
|
|
tLogLocalities(logset.tLogLocalities), tLogPolicy(logset.tLogPolicy), isLocal(logset.isLocal),
|
|
|
|
locality(logset.locality), startVersion(logset.startVersion), satelliteTagLocations(logset.satelliteTagLocations),
|
|
|
|
tLogVersion(logset.tLogVersion) {
|
|
|
|
for (const auto& log : logset.logServers) {
|
2019-04-19 01:18:11 +08:00
|
|
|
tLogs.push_back(log->get().id());
|
|
|
|
}
|
2019-09-24 05:14:16 +08:00
|
|
|
// Do NOT store logset.backupWorkers, because master will recruit new ones.
|
2019-04-19 01:18:11 +08:00
|
|
|
}
|
|
|
|
|
2019-06-01 07:14:58 +08:00
|
|
|
OldTLogCoreData::OldTLogCoreData(const OldLogData& oldData)
|
2019-10-02 04:38:52 +08:00
|
|
|
: logRouterTags(oldData.logRouterTags), txsTags(oldData.txsTags), epochBegin(oldData.epochBegin),
|
|
|
|
epochEnd(oldData.epochEnd), pseudoLocalities(oldData.pseudoLocalities), epoch(oldData.epoch) {
|
2019-04-19 04:41:37 +08:00
|
|
|
for (const Reference<LogSet>& logSet : oldData.tLogs) {
|
|
|
|
if (logSet->logServers.size()) {
|
|
|
|
tLogs.emplace_back(*logSet);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogSystem> {
|
2019-08-10 01:02:10 +08:00
|
|
|
const UID dbgid;
|
2019-04-09 05:45:16 +08:00
|
|
|
LogSystemType logSystemType;
|
2020-11-06 08:33:07 +08:00
|
|
|
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, satellite, or remote
|
2017-09-08 06:32:08 +08:00
|
|
|
int expectedLogSets;
|
2018-04-09 12:24:05 +08:00
|
|
|
int logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
int txsTags;
|
2018-04-21 04:25:22 +08:00
|
|
|
UID recruitmentID;
|
2018-07-04 13:59:04 +08:00
|
|
|
int repopulateRegionAntiQuorum;
|
2018-05-06 08:56:00 +08:00
|
|
|
bool stopped;
|
2019-07-30 10:17:10 +08:00
|
|
|
std::set<int8_t> pseudoLocalities; // Represent special localities that will be mapped to tagLocalityLogRouter
|
2019-06-01 07:14:58 +08:00
|
|
|
const LogEpoch epoch;
|
2019-08-15 08:00:20 +08:00
|
|
|
LogEpoch oldestBackupEpoch;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// new members
|
2019-08-11 01:31:25 +08:00
|
|
|
std::map<Tag, Version> pseudoLocalityPopVersion;
|
2017-05-26 04:48:44 +08:00
|
|
|
Future<Void> rejoins;
|
|
|
|
Future<Void> recoveryComplete;
|
2017-07-10 05:46:16 +08:00
|
|
|
Future<Void> remoteRecovery;
|
2017-07-14 03:29:21 +08:00
|
|
|
Future<Void> remoteRecoveryComplete;
|
2018-03-30 06:12:38 +08:00
|
|
|
std::vector<LogLockInfo> lockResults;
|
2018-06-02 09:42:48 +08:00
|
|
|
AsyncVar<bool> recoveryCompleteWrittenToCoreState;
|
2017-09-08 06:32:08 +08:00
|
|
|
bool remoteLogsWrittenToCoreState;
|
2018-02-03 03:46:04 +08:00
|
|
|
bool hasRemoteServers;
|
2019-08-15 05:19:50 +08:00
|
|
|
AsyncTrigger backupWorkerChanged;
|
2020-04-20 12:39:47 +08:00
|
|
|
std::set<UID> removedBackupWorkers; // Workers that are removed before setting them.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-06-27 09:20:28 +08:00
|
|
|
Optional<Version> recoverAt;
|
|
|
|
Optional<Version> recoveredAt;
|
2017-05-26 04:48:44 +08:00
|
|
|
Version knownCommittedVersion;
|
2020-03-09 11:50:32 +08:00
|
|
|
Version backupStartVersion = invalidVersion; // max(tLogs[0].startVersion, previous epochEnd).
|
2017-05-26 04:48:44 +08:00
|
|
|
LocalityData locality;
|
2020-11-13 05:29:11 +08:00
|
|
|
// For each currently running popFromLog actor, outstandingPops is
|
|
|
|
// (logID, tag)->(max popped version, durableKnownCommittedVersion).
|
|
|
|
// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
|
|
|
|
// will need to be copied into the next generation to restore the replication factor.
|
|
|
|
// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
|
|
|
|
// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
|
2020-10-28 00:11:56 +08:00
|
|
|
std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
|
2020-11-13 05:29:11 +08:00
|
|
|
|
2018-06-19 06:25:54 +08:00
|
|
|
Optional<PromiseStream<Future<Void>>> addActor;
|
|
|
|
ActorCollection popActors;
|
2020-03-21 09:39:51 +08:00
|
|
|
std::vector<OldLogData> oldLogData; // each element has the log info. in one old epoch.
|
2018-04-22 03:57:00 +08:00
|
|
|
AsyncTrigger logSystemConfigChanged;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TagPartitionedLogSystem(UID dbgid,
|
|
|
|
LocalityData locality,
|
|
|
|
LogEpoch e,
|
2019-06-01 07:14:58 +08:00
|
|
|
Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>())
|
|
|
|
: dbgid(dbgid), logSystemType(LogSystemType::empty), expectedLogSets(0), logRouterTags(0), txsTags(0),
|
2020-03-18 05:45:07 +08:00
|
|
|
repopulateRegionAntiQuorum(0), epoch(e), oldestBackupEpoch(0), recoveryCompleteWrittenToCoreState(false),
|
2019-08-15 08:00:20 +08:00
|
|
|
locality(locality), remoteLogsWrittenToCoreState(false), hasRemoteServers(false), stopped(false),
|
|
|
|
addActor(addActor), popActors(false) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
void stopRejoins() final { rejoins = Future<Void>(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void addref() final { ReferenceCounted<TagPartitionedLogSystem>::addref(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void delref() final { ReferenceCounted<TagPartitionedLogSystem>::delref(); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
std::string describe() const final {
|
2017-05-26 04:48:44 +08:00
|
|
|
std::string result;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < tLogs.size(); i++) {
|
2018-06-11 03:38:50 +08:00
|
|
|
result += format("%d: ", i);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int j = 0; j < tLogs[i]->logServers.size(); j++) {
|
|
|
|
result += tLogs[i]->logServers[j]->get().id().toString() +
|
|
|
|
((j == tLogs[i]->logServers.size() - 1) ? " " : ", ");
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
UID getDebugID() const final { return dbgid; }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-04-30 07:44:11 +08:00
|
|
|
void addPseudoLocality(int8_t locality) {
|
2019-04-24 06:39:26 +08:00
|
|
|
ASSERT(locality < 0);
|
|
|
|
pseudoLocalities.insert(locality);
|
2019-08-11 01:31:25 +08:00
|
|
|
for (uint16_t i = 0; i < logRouterTags; i++) {
|
|
|
|
pseudoLocalityPopVersion[Tag(locality, i)] = 0;
|
|
|
|
}
|
2019-04-24 06:39:26 +08:00
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const final {
|
2019-04-24 06:39:26 +08:00
|
|
|
switch (type) {
|
|
|
|
case ProcessClass::LogRouterClass:
|
2019-08-17 12:40:57 +08:00
|
|
|
if (tag.locality == tagLocalityLogRouter) {
|
|
|
|
ASSERT(pseudoLocalities.count(tagLocalityLogRouterMapped) > 0);
|
2019-04-24 06:39:26 +08:00
|
|
|
tag.locality = tagLocalityLogRouterMapped;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-05-24 07:06:23 +08:00
|
|
|
case ProcessClass::BackupClass:
|
2019-08-07 02:14:32 +08:00
|
|
|
if (tag.locality == tagLocalityLogRouter) {
|
|
|
|
ASSERT(pseudoLocalities.count(tagLocalityBackup) > 0);
|
2019-05-24 07:06:23 +08:00
|
|
|
tag.locality = tagLocalityBackup;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2019-08-15 05:19:50 +08:00
|
|
|
default: // This should be an error at caller site.
|
2019-04-24 06:39:26 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return tag;
|
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
bool hasPseudoLocality(int8_t locality) const final { return pseudoLocalities.count(locality) > 0; }
|
2019-04-24 06:39:26 +08:00
|
|
|
|
2020-03-19 10:04:43 +08:00
|
|
|
// Return the min version of all pseudoLocalities, i.e., logRouter and backupTag
|
2020-03-21 11:09:32 +08:00
|
|
|
Version popPseudoLocalityTag(Tag tag, Version upTo) final {
|
2019-08-11 01:31:25 +08:00
|
|
|
ASSERT(isPseudoLocality(tag.locality) && hasPseudoLocality(tag.locality));
|
|
|
|
|
|
|
|
Version& localityVersion = pseudoLocalityPopVersion[tag];
|
2019-04-24 06:39:26 +08:00
|
|
|
localityVersion = std::max(localityVersion, upTo);
|
|
|
|
Version minVersion = localityVersion;
|
2020-11-06 08:33:07 +08:00
|
|
|
// Why do we need to use the minimum popped version among all tags? Reason: for example,
|
|
|
|
// 2 pseudo tags pop 100 or 150, respectively. It's only safe to pop min(100, 150),
|
|
|
|
// because [101,150) is needed by another pseudo tag.
|
2019-08-11 01:31:25 +08:00
|
|
|
for (const int8_t locality : pseudoLocalities) {
|
|
|
|
minVersion = std::min(minVersion, pseudoLocalityPopVersion[Tag(locality, tag.id)]);
|
2019-04-24 06:39:26 +08:00
|
|
|
}
|
2020-01-17 08:43:42 +08:00
|
|
|
// TraceEvent("TLogPopPseudoTag", dbgid).detail("Tag", tag.toString()).detail("Version", upTo).detail("PopVersion", minVersion);
|
2019-04-24 06:39:26 +08:00
|
|
|
return minVersion;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
static Future<Void> recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
|
|
|
|
UID const& dbgid,
|
|
|
|
DBCoreState const& oldState,
|
|
|
|
FutureStream<TLogRejoinRequest> const& rejoins,
|
|
|
|
LocalityData const& locality,
|
|
|
|
bool* forceRecovery) {
|
|
|
|
return epochEnd(outLogSystem, dbgid, oldState, rejoins, locality, forceRecovery);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Reference<ILogSystem> fromLogSystemConfig(UID const& dbgid,
|
|
|
|
LocalityData const& locality,
|
|
|
|
LogSystemConfig const& lsConf,
|
|
|
|
bool excludeRemote,
|
|
|
|
bool useRecoveredAt,
|
|
|
|
Optional<PromiseStream<Future<Void>>> addActor) {
|
|
|
|
ASSERT(lsConf.logSystemType == LogSystemType::tagPartitioned ||
|
|
|
|
(lsConf.logSystemType == LogSystemType::empty && !lsConf.tLogs.size()));
|
|
|
|
// ASSERT(lsConf.epoch == epoch); //< FIXME
|
2020-11-07 15:50:55 +08:00
|
|
|
auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, lsConf.epoch, addActor);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-02-22 07:28:02 +08:00
|
|
|
logSystem->tLogs.reserve(lsConf.tLogs.size());
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystem->expectedLogSets = lsConf.expectedLogSets;
|
2018-04-09 12:24:05 +08:00
|
|
|
logSystem->logRouterTags = lsConf.logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
logSystem->txsTags = lsConf.txsTags;
|
2018-04-21 04:25:22 +08:00
|
|
|
logSystem->recruitmentID = lsConf.recruitmentID;
|
2018-05-06 08:56:00 +08:00
|
|
|
logSystem->stopped = lsConf.stopped;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (useRecoveredAt) {
|
2018-06-27 09:20:28 +08:00
|
|
|
logSystem->recoveredAt = lsConf.recoveredAt;
|
2018-06-02 09:42:48 +08:00
|
|
|
}
|
2019-04-11 01:30:34 +08:00
|
|
|
logSystem->pseudoLocalities = lsConf.pseudoLocalities;
|
2019-04-11 02:21:27 +08:00
|
|
|
for (const TLogSet& tLogSet : lsConf.tLogs) {
|
|
|
|
if (!excludeRemote || tLogSet.isLocal) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logSystem->tLogs.push_back(makeReference<LogSet>(tLogSet));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-04-19 01:18:11 +08:00
|
|
|
for (const auto& oldTlogConf : lsConf.oldTLogs) {
|
|
|
|
logSystem->oldLogData.emplace_back(oldTlogConf);
|
2019-08-11 01:31:25 +08:00
|
|
|
//TraceEvent("BWFromLSConf")
|
|
|
|
// .detail("Epoch", logSystem->oldLogData.back().epoch)
|
|
|
|
// .detail("Version", logSystem->oldLogData.back().epochEnd);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
logSystem->logSystemType = lsConf.logSystemType;
|
2020-03-18 05:45:07 +08:00
|
|
|
logSystem->oldestBackupEpoch = lsConf.oldestBackupEpoch;
|
2017-05-26 04:48:44 +08:00
|
|
|
return logSystem;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
static Reference<ILogSystem> fromOldLogSystemConfig(UID const& dbgid,
|
|
|
|
LocalityData const& locality,
|
|
|
|
LogSystemConfig const& lsConf) {
|
|
|
|
ASSERT(lsConf.logSystemType == LogSystemType::tagPartitioned ||
|
|
|
|
(lsConf.logSystemType == LogSystemType::empty && !lsConf.tLogs.size()));
|
|
|
|
// ASSERT(lsConf.epoch == epoch); //< FIXME
|
2019-06-01 07:14:58 +08:00
|
|
|
const LogEpoch e = lsConf.oldTLogs.size() > 0 ? lsConf.oldTLogs[0].epoch : 0;
|
2020-11-07 15:50:55 +08:00
|
|
|
auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, e);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-04-20 00:41:09 +08:00
|
|
|
if (lsConf.oldTLogs.size()) {
|
|
|
|
for (const TLogSet& tLogSet : lsConf.oldTLogs[0].tLogs) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logSystem->tLogs.push_back(makeReference<LogSet>(tLogSet));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
logSystem->logRouterTags = lsConf.oldTLogs[0].logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
logSystem->txsTags = lsConf.oldTLogs[0].txsTags;
|
2021-03-11 02:06:03 +08:00
|
|
|
// logSystem->epochEnd = lsConf.oldTLogs[0].epochEnd;
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 1; i < lsConf.oldTLogs.size(); i++) {
|
2019-04-19 01:18:11 +08:00
|
|
|
logSystem->oldLogData.emplace_back(lsConf.oldTLogs[i]);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
logSystem->logSystemType = lsConf.logSystemType;
|
2018-05-06 08:56:00 +08:00
|
|
|
logSystem->stopped = true;
|
2019-04-19 01:18:11 +08:00
|
|
|
logSystem->pseudoLocalities = lsConf.pseudoLocalities;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
return logSystem;
|
|
|
|
}
|
|
|
|
|
2021-02-18 06:43:31 +08:00
|
|
|
// Convert TagPartitionedLogSystem to DBCoreState and override input newState as return value
|
2020-03-21 11:09:32 +08:00
|
|
|
void toCoreState(DBCoreState& newState) final {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (recoveryComplete.isValid() && recoveryComplete.isError())
|
2017-05-26 04:48:44 +08:00
|
|
|
throw recoveryComplete.getError();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isError())
|
2018-04-09 12:24:05 +08:00
|
|
|
throw remoteRecoveryComplete.getError();
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
newState.tLogs.clear();
|
2018-04-09 12:24:05 +08:00
|
|
|
newState.logRouterTags = logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
newState.txsTags = txsTags;
|
2019-04-19 01:18:11 +08:00
|
|
|
newState.pseudoLocalities = pseudoLocalities;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const auto& t : tLogs) {
|
2019-04-19 01:18:11 +08:00
|
|
|
if (t->logServers.size()) {
|
|
|
|
newState.tLogs.emplace_back(*t);
|
2019-04-20 13:01:08 +08:00
|
|
|
newState.tLogs.back().tLogLocalities.clear();
|
|
|
|
for (const auto& log : t->logServers) {
|
2020-04-19 12:29:38 +08:00
|
|
|
newState.tLogs.back().tLogLocalities.push_back(log->get().interf().filteredLocality);
|
2019-04-20 13:01:08 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
newState.oldTLogData.clear();
|
2020-03-21 04:58:20 +08:00
|
|
|
if (!recoveryComplete.isValid() || !recoveryComplete.isReady() ||
|
|
|
|
(repopulateRegionAntiQuorum == 0 &&
|
|
|
|
(!remoteRecoveryComplete.isValid() || !remoteRecoveryComplete.isReady())) ||
|
|
|
|
epoch != oldestBackupEpoch) {
|
2019-04-19 04:41:37 +08:00
|
|
|
for (const auto& oldData : oldLogData) {
|
|
|
|
newState.oldTLogData.emplace_back(oldData);
|
2019-08-11 01:31:25 +08:00
|
|
|
TraceEvent("BWToCore")
|
|
|
|
.detail("Epoch", newState.oldTLogData.back().epoch)
|
2020-03-29 11:31:40 +08:00
|
|
|
.detail("TotalTags", newState.oldTLogData.back().logRouterTags)
|
2019-10-02 04:38:52 +08:00
|
|
|
.detail("BeginVersion", newState.oldTLogData.back().epochBegin)
|
|
|
|
.detail("EndVersion", newState.oldTLogData.back().epochEnd);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
newState.logSystemType = logSystemType;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool remoteStorageRecovered() final { return remoteRecoveryComplete.isValid() && remoteRecoveryComplete.isReady(); }
|
2018-07-15 07:26:45 +08:00
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
Future<Void> onCoreStateChanged() final {
|
2018-07-04 13:59:04 +08:00
|
|
|
std::vector<Future<Void>> changes;
|
|
|
|
changes.push_back(Never());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (recoveryComplete.isValid() && !recoveryComplete.isReady()) {
|
2018-07-04 13:59:04 +08:00
|
|
|
changes.push_back(recoveryComplete);
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (remoteRecovery.isValid() && !remoteRecovery.isReady()) {
|
2018-07-04 13:59:04 +08:00
|
|
|
changes.push_back(remoteRecovery);
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (remoteRecoveryComplete.isValid() && !remoteRecoveryComplete.isReady()) {
|
2018-07-04 13:59:04 +08:00
|
|
|
changes.push_back(remoteRecoveryComplete);
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2020-03-21 11:09:32 +08:00
|
|
|
changes.push_back(backupWorkerChanged.onTrigger()); // changes to oldestBackupEpoch
|
2018-07-04 13:59:04 +08:00
|
|
|
return waitForAny(changes);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
void coreStateWritten(DBCoreState const& newState) final {
|
2019-08-22 04:33:44 +08:00
|
|
|
if (!newState.oldTLogData.size()) {
|
2018-06-02 09:42:48 +08:00
|
|
|
recoveryCompleteWrittenToCoreState.set(true);
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& t : newState.tLogs) {
|
|
|
|
if (!t.isLocal) {
|
2018-04-30 09:54:47 +08:00
|
|
|
TraceEvent("RemoteLogsWritten", dbgid);
|
2017-09-08 06:32:08 +08:00
|
|
|
remoteLogsWrittenToCoreState = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> onError() final { return onError_internal(this); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem* self) {
|
2018-02-03 03:46:04 +08:00
|
|
|
// Never returns normally, but throws an error if the subsystem stops working
|
|
|
|
loop {
|
2019-08-22 04:33:44 +08:00
|
|
|
std::vector<Future<Void>> failed;
|
|
|
|
std::vector<Future<Void>> backupFailed(1, Never());
|
|
|
|
std::vector<Future<Void>> changes;
|
2018-02-03 03:46:04 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : self->tLogs) {
|
|
|
|
for (auto& t : it->logServers) {
|
|
|
|
if (t->get().present()) {
|
|
|
|
failed.push_back(waitFailureClient(t->get().interf().waitFailure,
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
2020-07-11 06:53:21 +08:00
|
|
|
-SERVER_KNOBS->TLOG_TIMEOUT /
|
|
|
|
SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2018-02-03 03:46:04 +08:00
|
|
|
} else {
|
|
|
|
changes.push_back(t->onChange());
|
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& t : it->logRouters) {
|
|
|
|
if (t->get().present()) {
|
|
|
|
failed.push_back(waitFailureClient(t->get().interf().waitFailure,
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
2020-07-11 06:53:21 +08:00
|
|
|
-SERVER_KNOBS->TLOG_TIMEOUT /
|
|
|
|
SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2018-02-03 03:46:04 +08:00
|
|
|
} else {
|
|
|
|
changes.push_back(t->onChange());
|
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2019-05-21 05:22:31 +08:00
|
|
|
for (const auto& worker : it->backupWorkers) {
|
|
|
|
if (worker->get().present()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
backupFailed.push_back(waitFailureClient(worker->get().interf().waitFailure,
|
|
|
|
SERVER_KNOBS->BACKUP_TIMEOUT,
|
|
|
|
-SERVER_KNOBS->BACKUP_TIMEOUT /
|
|
|
|
SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2019-05-21 05:22:31 +08:00
|
|
|
} else {
|
|
|
|
changes.push_back(worker->onChange());
|
|
|
|
}
|
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2018-06-02 09:42:48 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->recoveryCompleteWrittenToCoreState.get()) {
|
|
|
|
for (auto& old : self->oldLogData) {
|
|
|
|
for (auto& it : old.tLogs) {
|
|
|
|
for (auto& t : it->logRouters) {
|
|
|
|
if (t->get().present()) {
|
|
|
|
failed.push_back(waitFailureClient(t->get().interf().waitFailure,
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
|
|
|
-SERVER_KNOBS->TLOG_TIMEOUT /
|
|
|
|
SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2018-06-02 09:42:48 +08:00
|
|
|
} else {
|
|
|
|
changes.push_back(t->onChange());
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
}
|
2019-10-01 04:16:28 +08:00
|
|
|
// Monitor changes of backup workers for old epochs.
|
|
|
|
for (const auto& worker : old.tLogs[0]->backupWorkers) {
|
|
|
|
if (worker->get().present()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
backupFailed.push_back(waitFailureClient(worker->get().interf().waitFailure,
|
|
|
|
SERVER_KNOBS->BACKUP_TIMEOUT,
|
|
|
|
-SERVER_KNOBS->BACKUP_TIMEOUT /
|
|
|
|
SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2019-10-01 04:16:28 +08:00
|
|
|
} else {
|
|
|
|
changes.push_back(worker->onChange());
|
|
|
|
}
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->hasRemoteServers && (!self->remoteRecovery.isReady() || self->remoteRecovery.isError())) {
|
2018-02-03 03:46:04 +08:00
|
|
|
changes.push_back(self->remoteRecovery);
|
|
|
|
}
|
|
|
|
|
2018-06-02 09:42:48 +08:00
|
|
|
changes.push_back(self->recoveryCompleteWrittenToCoreState.onChange());
|
2019-08-15 05:19:50 +08:00
|
|
|
changes.push_back(self->backupWorkerChanged.onTrigger());
|
2018-02-03 03:46:04 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(failed.size() >= 1);
|
2019-08-22 04:33:44 +08:00
|
|
|
wait(quorum(changes, 1) || tagError<Void>(quorum(failed, 1), master_tlog_failed()) ||
|
|
|
|
tagError<Void>(quorum(backupFailed, 1), master_backup_worker_failed()));
|
2018-02-03 03:46:04 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr) {
|
2020-08-18 23:43:14 +08:00
|
|
|
self->slowReplies = 0;
|
|
|
|
self->fastReplies = 0;
|
|
|
|
wait(delay(SERVER_KNOBS->PUSH_STATS_INTERVAL));
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("SlowPushStats")
|
|
|
|
.detail("PeerAddress", addr)
|
|
|
|
.detail("SlowReplies", self->slowReplies)
|
|
|
|
.detail("FastReplies", self->fastReplies);
|
|
|
|
if (self->slowReplies >= SERVER_KNOBS->PUSH_STATS_SLOW_AMOUNT &&
|
|
|
|
self->slowReplies / double(self->slowReplies + self->fastReplies) >= SERVER_KNOBS->PUSH_STATS_SLOW_RATIO) {
|
2020-08-18 23:43:14 +08:00
|
|
|
FlowTransport::transport().resetConnection(addr);
|
|
|
|
self->lastReset = now();
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<TLogCommitReply> recordPushMetrics(Reference<ConnectionResetInfo> self,
|
|
|
|
NetworkAddress addr,
|
|
|
|
Future<TLogCommitReply> in) {
|
2020-08-18 23:43:14 +08:00
|
|
|
state double startTime = now();
|
|
|
|
TLogCommitReply t = wait(in);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (now() - self->lastReset > SERVER_KNOBS->PUSH_RESET_INTERVAL) {
|
|
|
|
if (now() - startTime > SERVER_KNOBS->PUSH_MAX_LATENCY) {
|
|
|
|
if (self->resetCheck.isReady()) {
|
2020-08-18 23:43:14 +08:00
|
|
|
self->resetCheck = pushResetChecker(self, addr);
|
|
|
|
}
|
|
|
|
self->slowReplies++;
|
|
|
|
} else {
|
|
|
|
self->fastReplies++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Version> push(Version prevVersion,
|
|
|
|
Version version,
|
|
|
|
Version knownCommittedVersion,
|
|
|
|
Version minKnownCommittedVersion,
|
|
|
|
LogPushData& data,
|
|
|
|
SpanID const& spanContext,
|
|
|
|
Optional<UID> debugID) final {
|
2017-05-26 04:48:44 +08:00
|
|
|
// FIXME: Randomize request order as in LegacyLogSystem?
|
2017-07-10 05:46:16 +08:00
|
|
|
vector<Future<Void>> quorumResults;
|
2019-08-29 05:40:50 +08:00
|
|
|
vector<Future<TLogCommitReply>> allReplies;
|
2017-07-10 05:46:16 +08:00
|
|
|
int location = 0;
|
2020-08-29 03:02:51 +08:00
|
|
|
Span span("TPLS:push"_loc, spanContext);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : tLogs) {
|
|
|
|
if (it->isLocal && it->logServers.size()) {
|
|
|
|
if (it->connectionResetTrackers.size() == 0) {
|
|
|
|
for (int i = 0; i < it->logServers.size(); i++) {
|
2020-11-07 15:50:55 +08:00
|
|
|
it->connectionResetTrackers.push_back(makeReference<ConnectionResetInfo>());
|
2020-08-27 01:53:17 +08:00
|
|
|
}
|
2020-08-18 23:43:14 +08:00
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
vector<Future<Void>> tLogCommitResults;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int loc = 0; loc < it->logServers.size(); loc++) {
|
2019-03-29 02:52:50 +08:00
|
|
|
Standalone<StringRef> msg = data.getMessages(location);
|
2021-03-11 02:06:03 +08:00
|
|
|
allReplies.push_back(recordPushMetrics(
|
|
|
|
it->connectionResetTrackers[loc],
|
|
|
|
it->logServers[loc]->get().interf().address(),
|
|
|
|
it->logServers[loc]->get().interf().commit.getReply(TLogCommitRequest(spanContext,
|
|
|
|
msg.arena(),
|
|
|
|
prevVersion,
|
|
|
|
version,
|
|
|
|
knownCommittedVersion,
|
|
|
|
minKnownCommittedVersion,
|
|
|
|
msg,
|
|
|
|
debugID),
|
|
|
|
TaskPriority::ProxyTLogCommitReply)));
|
2018-06-22 06:29:46 +08:00
|
|
|
Future<Void> commitSuccess = success(allReplies.back());
|
|
|
|
addActor.get().send(commitSuccess);
|
|
|
|
tLogCommitResults.push_back(commitSuccess);
|
2017-07-10 05:46:16 +08:00
|
|
|
location++;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
quorumResults.push_back(quorum(tLogCommitResults, tLogCommitResults.size() - it->tLogWriteAntiQuorum));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
return minVersionWhenReady(waitForAll(quorumResults), allReplies);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<IPeekCursor> peekAll(UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore) {
|
2018-06-16 03:36:19 +08:00
|
|
|
int bestSet = 0;
|
2018-04-26 09:20:28 +08:00
|
|
|
std::vector<Reference<LogSet>> localSets;
|
|
|
|
Version lastBegin = 0;
|
2018-11-05 11:28:15 +08:00
|
|
|
bool foundSpecial = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : tLogs) {
|
|
|
|
if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
|
2018-11-05 11:28:15 +08:00
|
|
|
foundSpecial = true;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->isLocal && log->logServers.size() &&
|
|
|
|
(log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded ||
|
|
|
|
log->locality == tag.locality || tag == txsTag || tag.locality == tagLocalityTxs ||
|
|
|
|
tag.locality == tagLocalityLogRouter ||
|
|
|
|
((tag.locality == tagLocalityUpgraded || tag == cacheTag) && log->locality != tagLocalitySatellite))) {
|
2018-04-26 09:20:28 +08:00
|
|
|
lastBegin = std::max(lastBegin, log->startVersion);
|
|
|
|
localSets.push_back(log);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->locality != tagLocalitySatellite) {
|
|
|
|
bestSet = localSets.size() - 1;
|
2018-04-24 03:42:51 +08:00
|
|
|
}
|
2017-07-14 03:29:21 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-06-02 09:42:48 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!localSets.size()) {
|
2018-11-03 05:13:57 +08:00
|
|
|
lastBegin = end;
|
2018-06-02 09:42:48 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin >= lastBegin && localSets.size()) {
|
|
|
|
TraceEvent("TLogPeekAllCurrentOnly", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("BestLogs", localSets[bestSet]->logServerString());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::SetPeekCursor>(
|
|
|
|
localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, begin, end, parallelGetMore);
|
2018-04-26 09:20:28 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
|
|
|
std::vector<LogMessageVersion> epochEnds;
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (lastBegin < end && localSets.size()) {
|
|
|
|
TraceEvent("TLogPeekAllAddingCurrent", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("BestLogs", localSets[bestSet]->logServerString());
|
|
|
|
cursors.push_back(makeReference<ILogSystem::SetPeekCursor>(localSets,
|
|
|
|
bestSet,
|
2020-11-07 13:13:10 +08:00
|
|
|
localSets[bestSet]->bestLocationFor(tag),
|
2021-03-11 02:06:03 +08:00
|
|
|
tag,
|
|
|
|
lastBegin,
|
|
|
|
end,
|
|
|
|
parallelGetMore));
|
2017-07-14 03:29:21 +08:00
|
|
|
}
|
2019-05-15 00:55:39 +08:00
|
|
|
for (int i = 0; begin < lastBegin; i++) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (i == oldLogData.size()) {
|
|
|
|
if (tag == txsTag || tag.locality == tagLocalityTxs || tag == cacheTag) {
|
2018-04-26 09:20:28 +08:00
|
|
|
break;
|
2018-04-24 03:42:51 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("TLogPeekAllDead", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("OldLogDataSize", oldLogData.size());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
|
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
getPeekEnd(),
|
|
|
|
false,
|
2020-11-07 15:50:55 +08:00
|
|
|
false);
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-04-24 03:42:51 +08:00
|
|
|
|
2018-06-16 03:36:19 +08:00
|
|
|
int bestOldSet = 0;
|
2018-04-26 09:20:28 +08:00
|
|
|
std::vector<Reference<LogSet>> localOldSets;
|
|
|
|
Version thisBegin = begin;
|
2018-11-05 11:28:15 +08:00
|
|
|
bool thisSpecial = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : oldLogData[i].tLogs) {
|
|
|
|
if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
|
2018-11-05 11:28:15 +08:00
|
|
|
thisSpecial = true;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->isLocal && log->logServers.size() &&
|
|
|
|
(log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded ||
|
|
|
|
log->locality == tag.locality || tag == txsTag || tag.locality == tagLocalityTxs ||
|
|
|
|
tag.locality == tagLocalityLogRouter ||
|
|
|
|
((tag.locality == tagLocalityUpgraded || tag == cacheTag) &&
|
|
|
|
log->locality != tagLocalitySatellite))) {
|
2018-04-26 09:20:28 +08:00
|
|
|
thisBegin = std::max(thisBegin, log->startVersion);
|
|
|
|
localOldSets.push_back(log);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->locality != tagLocalitySatellite) {
|
|
|
|
bestOldSet = localOldSets.size() - 1;
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-06-02 09:42:48 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!localOldSets.size()) {
|
|
|
|
TraceEvent("TLogPeekAllNoLocalSets", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LastBegin", lastBegin);
|
|
|
|
if (!cursors.size() && !foundSpecial) {
|
2018-11-03 05:13:57 +08:00
|
|
|
continue;
|
|
|
|
}
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
|
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
getPeekEnd(),
|
|
|
|
false,
|
2020-11-07 15:50:55 +08:00
|
|
|
false);
|
2018-06-02 09:42:48 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (thisSpecial) {
|
2018-11-05 11:28:15 +08:00
|
|
|
foundSpecial = true;
|
|
|
|
}
|
2018-06-02 09:42:48 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (thisBegin < lastBegin) {
|
|
|
|
if (thisBegin < end) {
|
|
|
|
TraceEvent("TLogPeekAllAddingOld", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("BestLogs", localOldSets[bestOldSet]->logServerString())
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("ThisBegin", thisBegin);
|
|
|
|
cursors.push_back(
|
|
|
|
makeReference<ILogSystem::SetPeekCursor>(localOldSets,
|
|
|
|
bestOldSet,
|
|
|
|
localOldSets[bestOldSet]->bestLocationFor(tag),
|
|
|
|
tag,
|
|
|
|
thisBegin,
|
|
|
|
std::min(lastBegin, end),
|
|
|
|
parallelGetMore));
|
2018-04-26 09:20:28 +08:00
|
|
|
epochEnds.push_back(LogMessageVersion(std::min(lastBegin, end)));
|
2018-04-25 13:06:10 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
lastBegin = thisBegin;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<IPeekCursor> peekRemote(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) {
|
2018-04-26 09:20:28 +08:00
|
|
|
int bestSet = -1;
|
2018-06-27 09:20:28 +08:00
|
|
|
Version lastBegin = recoveredAt.present() ? recoveredAt.get() + 1 : 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < tLogs.size(); t++) {
|
|
|
|
if (tLogs[t]->isLocal) {
|
2018-04-26 09:20:28 +08:00
|
|
|
lastBegin = std::max(lastBegin, tLogs[t]->startVersion);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tLogs[t]->logRouters.size()) {
|
2018-04-26 09:20:28 +08:00
|
|
|
ASSERT(bestSet == -1);
|
|
|
|
bestSet = t;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestSet == -1) {
|
|
|
|
TraceEvent("TLogPeekRemoteNoBestSet", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end.present() ? end.get() : getPeekEnd());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
|
2021-03-11 02:06:03 +08:00
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
getPeekEnd(),
|
|
|
|
false,
|
|
|
|
parallelGetMore);
|
|
|
|
}
|
|
|
|
if (begin >= lastBegin) {
|
|
|
|
TraceEvent("TLogPeekRemoteBestOnly", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end.present() ? end.get() : getPeekEnd())
|
|
|
|
.detail("BestSet", bestSet)
|
|
|
|
.detail("BestSetStart", lastBegin)
|
|
|
|
.detail("LogRouterIds", tLogs[bestSet]->logRouterString());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::BufferedCursor>(
|
|
|
|
tLogs[bestSet]->logRouters, tag, begin, end.present() ? end.get() + 1 : getPeekEnd(), parallelGetMore);
|
2018-02-23 08:13:56 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
|
|
|
std::vector<LogMessageVersion> epochEnds;
|
|
|
|
TraceEvent("TLogPeekRemoteAddingBest", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end.present() ? end.get() : getPeekEnd())
|
|
|
|
.detail("BestSet", bestSet)
|
|
|
|
.detail("BestSetStart", lastBegin)
|
|
|
|
.detail("LogRouterIds", tLogs[bestSet]->logRouterString());
|
|
|
|
cursors.push_back(makeReference<ILogSystem::BufferedCursor>(tLogs[bestSet]->logRouters,
|
|
|
|
tag,
|
|
|
|
lastBegin,
|
2020-11-07 13:13:10 +08:00
|
|
|
end.present() ? end.get() + 1 : getPeekEnd(),
|
|
|
|
parallelGetMore));
|
2018-04-26 09:20:28 +08:00
|
|
|
int i = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (begin < lastBegin) {
|
|
|
|
if (i == oldLogData.size()) {
|
|
|
|
TraceEvent("TLogPeekRemoteDead", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end.present() ? end.get() : getPeekEnd())
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("OldLogDataSize", oldLogData.size());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
|
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
getPeekEnd(),
|
|
|
|
false,
|
2020-11-07 15:50:55 +08:00
|
|
|
parallelGetMore);
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2018-04-26 09:20:28 +08:00
|
|
|
int bestOldSet = -1;
|
|
|
|
Version thisBegin = begin;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < oldLogData[i].tLogs.size(); t++) {
|
|
|
|
if (oldLogData[i].tLogs[t]->isLocal) {
|
2018-04-26 09:20:28 +08:00
|
|
|
thisBegin = std::max(thisBegin, oldLogData[i].tLogs[t]->startVersion);
|
2018-04-24 03:42:51 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogData[i].tLogs[t]->logRouters.size()) {
|
2018-04-26 09:20:28 +08:00
|
|
|
ASSERT(bestOldSet == -1);
|
|
|
|
bestOldSet = t;
|
2018-04-25 13:06:10 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestOldSet == -1) {
|
|
|
|
TraceEvent("TLogPeekRemoteNoOldBestSet", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end.present() ? end.get() : getPeekEnd());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(),
|
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
getPeekEnd(),
|
|
|
|
false,
|
2020-11-07 15:50:55 +08:00
|
|
|
parallelGetMore);
|
2017-07-11 08:41:32 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (thisBegin < lastBegin) {
|
|
|
|
TraceEvent("TLogPeekRemoteAddingOldBest", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end.present() ? end.get() : getPeekEnd())
|
|
|
|
.detail("BestOldSet", bestOldSet)
|
|
|
|
.detail("LogRouterIds", oldLogData[i].tLogs[bestOldSet]->logRouterString())
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("ThisBegin", thisBegin)
|
|
|
|
.detail("BestStartVer", oldLogData[i].tLogs[bestOldSet]->startVersion);
|
2020-11-07 13:13:10 +08:00
|
|
|
cursors.push_back(makeReference<ILogSystem::BufferedCursor>(
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->logRouters, tag, thisBegin, lastBegin, parallelGetMore));
|
2019-04-19 05:53:51 +08:00
|
|
|
epochEnds.emplace_back(lastBegin);
|
2018-04-26 09:20:28 +08:00
|
|
|
lastBegin = thisBegin;
|
|
|
|
}
|
|
|
|
i++;
|
2017-07-11 08:41:32 +08:00
|
|
|
}
|
2018-04-26 09:20:28 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<IPeekCursor> peek(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) final {
|
|
|
|
if (!tLogs.size()) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent("TLogPeekNoLogSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
|
2021-03-11 02:06:03 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tag.locality == tagLocalityRemoteLog) {
|
2019-11-05 11:47:45 +08:00
|
|
|
return peekRemote(dbgid, begin, end, tag, parallelGetMore);
|
2018-04-26 09:20:28 +08:00
|
|
|
} else {
|
2018-11-12 11:16:25 +08:00
|
|
|
return peekAll(dbgid, begin, getPeekEnd(), tag, parallelGetMore);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<IPeekCursor> peek(UID dbgid,
|
|
|
|
Version begin,
|
|
|
|
Optional<Version> end,
|
|
|
|
std::vector<Tag> tags,
|
|
|
|
bool parallelGetMore) final {
|
|
|
|
if (tags.empty()) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent("TLogPeekNoTags", dbgid).detail("Begin", begin);
|
2021-03-11 02:06:03 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), invalidTag, begin, getPeekEnd(), false, false);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tags.size() == 1) {
|
2019-11-05 11:47:45 +08:00
|
|
|
return peek(dbgid, begin, end, tags[0], parallelGetMore);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
2021-03-04 11:36:21 +08:00
|
|
|
cursors.reserve(tags.size());
|
|
|
|
for (auto tag : tags) {
|
2019-11-05 11:47:45 +08:00
|
|
|
cursors.push_back(peek(dbgid, begin, end, tag, parallelGetMore));
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
return makeReference<ILogSystem::BufferedCursor>(cursors,
|
|
|
|
begin,
|
|
|
|
end.present() ? end.get() + 1 : getPeekEnd(),
|
|
|
|
true,
|
|
|
|
tLogs[0]->locality == tagLocalityUpgraded,
|
|
|
|
false);
|
|
|
|
}
|
|
|
|
|
|
|
|
Reference<IPeekCursor> peekLocal(UID dbgid,
|
|
|
|
Tag tag,
|
|
|
|
Version begin,
|
|
|
|
Version end,
|
|
|
|
bool useMergePeekCursors,
|
|
|
|
int8_t peekLocality = tagLocalityInvalid) {
|
|
|
|
if (tag.locality >= 0 || tag.locality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial) {
|
2018-09-29 03:21:08 +08:00
|
|
|
peekLocality = tag.locality;
|
|
|
|
}
|
2020-01-08 05:06:58 +08:00
|
|
|
ASSERT(peekLocality >= 0 || peekLocality == tagLocalityUpgraded || tag.locality == tagLocalitySpecial);
|
2018-06-16 03:36:19 +08:00
|
|
|
|
2018-02-24 04:26:19 +08:00
|
|
|
int bestSet = -1;
|
2018-04-26 09:20:28 +08:00
|
|
|
bool foundSpecial = false;
|
2018-11-05 11:28:15 +08:00
|
|
|
int logCount = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < tLogs.size(); t++) {
|
|
|
|
if (tLogs[t]->logServers.size() && tLogs[t]->locality != tagLocalitySatellite) {
|
2018-11-05 11:28:15 +08:00
|
|
|
logCount++;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tLogs[t]->logServers.size() &&
|
|
|
|
(tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded ||
|
|
|
|
tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded ||
|
|
|
|
peekLocality == tagLocalitySpecial)) {
|
|
|
|
if (tLogs[t]->locality == tagLocalitySpecial || tLogs[t]->locality == tagLocalityUpgraded) {
|
2018-04-26 09:20:28 +08:00
|
|
|
foundSpecial = true;
|
|
|
|
}
|
2018-02-24 04:26:19 +08:00
|
|
|
bestSet = t;
|
|
|
|
break;
|
2017-07-14 03:29:21 +08:00
|
|
|
}
|
2018-02-24 04:26:19 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestSet == -1) {
|
|
|
|
TraceEvent("TLogPeekLocalNoBestSet", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LogCount", logCount);
|
|
|
|
if (useMergePeekCursors || logCount > 1) {
|
2018-09-29 03:21:08 +08:00
|
|
|
throw worker_removed();
|
|
|
|
} else {
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
|
2018-09-29 03:21:08 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin >= tLogs[bestSet]->startVersion) {
|
|
|
|
TraceEvent("TLogPeekLocalBestOnly", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("BestSet", bestSet)
|
|
|
|
.detail("BestSetStart", tLogs[bestSet]->startVersion)
|
|
|
|
.detail("LogId", tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)]->get().id());
|
|
|
|
if (useMergePeekCursors) {
|
|
|
|
return makeReference<ILogSystem::MergedPeekCursor>(tLogs[bestSet]->logServers,
|
|
|
|
tLogs[bestSet]->bestLocationFor(tag),
|
|
|
|
tLogs[bestSet]->logServers.size() + 1 -
|
|
|
|
tLogs[bestSet]->tLogReplicationFactor,
|
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
end,
|
|
|
|
true,
|
|
|
|
tLogs[bestSet]->tLogLocalities,
|
|
|
|
tLogs[bestSet]->tLogPolicy,
|
|
|
|
tLogs[bestSet]->tLogReplicationFactor);
|
2018-09-29 03:21:08 +08:00
|
|
|
} else {
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)], tag, begin, end, false, false);
|
2018-09-29 03:21:08 +08:00
|
|
|
}
|
2018-02-24 04:26:19 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
|
|
|
std::vector<LogMessageVersion> epochEnds;
|
2018-03-30 06:12:38 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tLogs[bestSet]->startVersion < end) {
|
|
|
|
TraceEvent("TLogPeekLocalAddingBest", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("BestSet", bestSet)
|
|
|
|
.detail("BestSetStart", tLogs[bestSet]->startVersion)
|
|
|
|
.detail("LogId", tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)]->get().id());
|
|
|
|
if (useMergePeekCursors) {
|
2020-11-07 13:13:10 +08:00
|
|
|
cursors.push_back(makeReference<ILogSystem::MergedPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
tLogs[bestSet]->logServers,
|
|
|
|
tLogs[bestSet]->bestLocationFor(tag),
|
|
|
|
tLogs[bestSet]->logServers.size() + 1 - tLogs[bestSet]->tLogReplicationFactor,
|
|
|
|
tag,
|
|
|
|
tLogs[bestSet]->startVersion,
|
|
|
|
end,
|
|
|
|
true,
|
|
|
|
tLogs[bestSet]->tLogLocalities,
|
|
|
|
tLogs[bestSet]->tLogPolicy,
|
|
|
|
tLogs[bestSet]->tLogReplicationFactor));
|
2018-09-29 03:21:08 +08:00
|
|
|
} else {
|
2020-11-07 13:13:10 +08:00
|
|
|
cursors.push_back(makeReference<ILogSystem::ServerPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
tLogs[bestSet]->logServers[tLogs[bestSet]->bestLocationFor(tag)],
|
|
|
|
tag,
|
|
|
|
tLogs[bestSet]->startVersion,
|
|
|
|
end,
|
|
|
|
false,
|
|
|
|
false));
|
2018-09-29 03:21:08 +08:00
|
|
|
}
|
2018-03-31 08:39:45 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
Version lastBegin = tLogs[bestSet]->startVersion;
|
2019-05-15 00:55:39 +08:00
|
|
|
for (int i = 0; begin < lastBegin; i++) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (i == oldLogData.size()) {
|
|
|
|
if ((tag == txsTag || tag.locality == tagLocalityTxs) && cursors.size()) {
|
2018-09-29 03:21:08 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("TLogPeekLocalDead", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("OldLogDataSize", oldLogData.size());
|
2018-04-24 03:42:51 +08:00
|
|
|
throw worker_removed();
|
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2018-03-30 06:12:38 +08:00
|
|
|
int bestOldSet = -1;
|
2018-11-05 11:28:15 +08:00
|
|
|
logCount = 0;
|
2018-11-12 11:16:25 +08:00
|
|
|
bool nextFoundSpecial = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < oldLogData[i].tLogs.size(); t++) {
|
|
|
|
if (oldLogData[i].tLogs[t]->logServers.size() &&
|
|
|
|
oldLogData[i].tLogs[t]->locality != tagLocalitySatellite) {
|
2018-11-05 11:28:15 +08:00
|
|
|
logCount++;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogData[i].tLogs[t]->logServers.size() &&
|
|
|
|
(oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ||
|
|
|
|
oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded ||
|
|
|
|
oldLogData[i].tLogs[t]->locality == peekLocality || peekLocality == tagLocalityUpgraded ||
|
|
|
|
peekLocality == tagLocalitySpecial)) {
|
|
|
|
if (oldLogData[i].tLogs[t]->locality == tagLocalitySpecial ||
|
|
|
|
oldLogData[i].tLogs[t]->locality == tagLocalityUpgraded) {
|
2018-11-12 11:16:25 +08:00
|
|
|
nextFoundSpecial = true;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (foundSpecial && !oldLogData[i].tLogs[t]->isLocal) {
|
|
|
|
TraceEvent("TLogPeekLocalRemoteBeforeSpecial", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("OldLogDataSize", oldLogData.size())
|
|
|
|
.detail("Idx", i);
|
2018-11-12 11:16:25 +08:00
|
|
|
throw worker_removed();
|
2018-04-26 09:20:28 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
bestOldSet = t;
|
2018-02-24 04:26:19 +08:00
|
|
|
break;
|
2018-01-29 03:52:54 +08:00
|
|
|
}
|
|
|
|
}
|
2018-04-27 01:59:21 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestOldSet == -1) {
|
|
|
|
TraceEvent("TLogPeekLocalNoBestSet", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LastBegin", lastBegin)
|
|
|
|
.detail("OldLogDataSize", oldLogData.size())
|
|
|
|
.detail("Idx", i)
|
|
|
|
.detail("LogRouterTags", oldLogData[i].logRouterTags)
|
|
|
|
.detail("LogCount", logCount)
|
|
|
|
.detail("FoundSpecial", foundSpecial);
|
|
|
|
if (oldLogData[i].logRouterTags == 0 || logCount > 1 || foundSpecial) {
|
2018-07-07 07:52:25 +08:00
|
|
|
throw worker_removed();
|
|
|
|
}
|
2018-04-27 01:59:21 +08:00
|
|
|
continue;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-03-31 08:39:45 +08:00
|
|
|
|
2018-11-12 11:16:25 +08:00
|
|
|
foundSpecial = nextFoundSpecial;
|
|
|
|
|
2018-04-09 12:24:05 +08:00
|
|
|
Version thisBegin = std::max(oldLogData[i].tLogs[bestOldSet]->startVersion, begin);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (thisBegin < lastBegin) {
|
|
|
|
if (thisBegin < end) {
|
|
|
|
TraceEvent("TLogPeekLocalAddingOldBest", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("BestOldSet", bestOldSet)
|
|
|
|
.detail("LogServers", oldLogData[i].tLogs[bestOldSet]->logServerString())
|
|
|
|
.detail("ThisBegin", thisBegin)
|
|
|
|
.detail("LastBegin", lastBegin);
|
|
|
|
// detail("LogId",
|
|
|
|
// oldLogData[i].tLogs[bestOldSet]->logServers[tLogs[bestOldSet]->bestLocationFor( tag
|
|
|
|
// )]->get().id());
|
2020-11-07 13:13:10 +08:00
|
|
|
cursors.push_back(makeReference<ILogSystem::MergedPeekCursor>(
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->logServers,
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->bestLocationFor(tag),
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->logServers.size() + 1 -
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor,
|
2021-03-11 02:06:03 +08:00
|
|
|
tag,
|
|
|
|
thisBegin,
|
|
|
|
std::min(lastBegin, end),
|
|
|
|
useMergePeekCursors,
|
2020-11-07 13:13:10 +08:00
|
|
|
oldLogData[i].tLogs[bestOldSet]->tLogLocalities,
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->tLogPolicy,
|
|
|
|
oldLogData[i].tLogs[bestOldSet]->tLogReplicationFactor));
|
2019-04-19 05:53:51 +08:00
|
|
|
epochEnds.emplace_back(std::min(lastBegin, end));
|
2018-04-25 13:06:10 +08:00
|
|
|
}
|
|
|
|
lastBegin = thisBegin;
|
2018-03-31 08:39:45 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<IPeekCursor> peekTxs(UID dbgid,
|
|
|
|
Version begin,
|
|
|
|
int8_t peekLocality,
|
|
|
|
Version localEnd,
|
|
|
|
bool canDiscardPopped) final {
|
2018-09-29 03:21:08 +08:00
|
|
|
Version end = getEnd();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!tLogs.size()) {
|
2019-06-20 09:15:09 +08:00
|
|
|
TraceEvent("TLogPeekTxsNoLogs", dbgid);
|
2021-03-11 02:06:03 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), txsTag, begin, end, false, false);
|
2019-06-20 09:15:09 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("TLogPeekTxs", dbgid)
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("End", end)
|
|
|
|
.detail("LocalEnd", localEnd)
|
|
|
|
.detail("PeekLocality", peekLocality)
|
|
|
|
.detail("CanDiscardPopped", canDiscardPopped);
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2019-06-29 03:33:24 +08:00
|
|
|
int maxTxsTags = txsTags;
|
2019-07-17 16:25:09 +08:00
|
|
|
bool needsOldTxs = tLogs[0]->tLogVersion < TLogVersion::V4;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : oldLogData) {
|
2019-06-29 03:33:24 +08:00
|
|
|
maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
|
2019-07-17 16:25:09 +08:00
|
|
|
needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
|
2019-06-28 14:39:19 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (peekLocality < 0 || localEnd == invalidVersion || localEnd <= begin) {
|
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
2021-03-04 11:36:21 +08:00
|
|
|
cursors.reserve(maxTxsTags);
|
|
|
|
for (int i = 0; i < maxTxsTags; i++) {
|
2019-06-20 09:15:09 +08:00
|
|
|
cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: remove once upgrades from 6.2 are no longer supported
|
|
|
|
if (needsOldTxs) {
|
2019-06-29 03:33:24 +08:00
|
|
|
cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
|
|
|
|
}
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
|
2018-09-29 03:21:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (localEnd >= end) {
|
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
2021-03-04 11:36:21 +08:00
|
|
|
cursors.reserve(maxTxsTags);
|
|
|
|
for (int i = 0; i < maxTxsTags; i++) {
|
2019-06-20 09:15:09 +08:00
|
|
|
cursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, end, true, peekLocality));
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: remove once upgrades from 6.2 are no longer supported
|
|
|
|
if (needsOldTxs) {
|
2019-06-29 03:33:24 +08:00
|
|
|
cursors.push_back(peekLocal(dbgid, txsTag, begin, end, true, peekLocality));
|
|
|
|
}
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
|
2018-09-29 03:21:08 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
|
|
|
std::vector<LogMessageVersion> epochEnds;
|
2018-09-29 03:21:08 +08:00
|
|
|
|
|
|
|
cursors.resize(2);
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> localCursors;
|
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> allCursors;
|
|
|
|
for (int i = 0; i < maxTxsTags; i++) {
|
2019-06-20 09:15:09 +08:00
|
|
|
localCursors.push_back(peekLocal(dbgid, Tag(tagLocalityTxs, i), begin, localEnd, true, peekLocality));
|
|
|
|
allCursors.push_back(peekAll(dbgid, localEnd, end, Tag(tagLocalityTxs, i), true));
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: remove once upgrades from 6.2 are no longer supported
|
|
|
|
if (needsOldTxs) {
|
2019-06-29 03:33:24 +08:00
|
|
|
localCursors.push_back(peekLocal(dbgid, txsTag, begin, localEnd, true, peekLocality));
|
|
|
|
allCursors.push_back(peekAll(dbgid, localEnd, end, txsTag, true));
|
|
|
|
}
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
cursors[1] = makeReference<ILogSystem::BufferedCursor>(
|
|
|
|
localCursors, begin, localEnd, false, false, canDiscardPopped);
|
2020-11-07 15:50:55 +08:00
|
|
|
cursors[0] = makeReference<ILogSystem::BufferedCursor>(allCursors, localEnd, end, false, false, false);
|
2019-04-19 05:53:51 +08:00
|
|
|
epochEnds.emplace_back(localEnd);
|
2018-09-29 03:21:08 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_worker_removed) {
|
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
2021-03-04 11:36:21 +08:00
|
|
|
cursors.reserve(maxTxsTags);
|
|
|
|
for (int i = 0; i < maxTxsTags; i++) {
|
2019-06-20 09:15:09 +08:00
|
|
|
cursors.push_back(peekAll(dbgid, begin, end, Tag(tagLocalityTxs, i), true));
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: remove once upgrades from 6.2 are no longer supported
|
|
|
|
if (needsOldTxs) {
|
2019-06-29 03:33:24 +08:00
|
|
|
cursors.push_back(peekAll(dbgid, begin, end, txsTag, true));
|
|
|
|
}
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::BufferedCursor>(cursors, begin, end, false, false, canDiscardPopped);
|
2018-09-29 03:21:08 +08:00
|
|
|
}
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<IPeekCursor> peekSingle(UID dbgid,
|
|
|
|
Version begin,
|
|
|
|
Tag tag,
|
|
|
|
std::vector<std::pair<Version, Tag>> history) final {
|
|
|
|
while (history.size() && begin >= history.back().first) {
|
2018-06-20 13:16:45 +08:00
|
|
|
history.pop_back();
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (history.size() == 0) {
|
2018-11-03 05:13:57 +08:00
|
|
|
TraceEvent("TLogPeekSingleNoHistory", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
|
2018-09-29 03:21:08 +08:00
|
|
|
return peekLocal(dbgid, tag, begin, getPeekEnd(), false);
|
2018-03-30 06:12:38 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Reference<ILogSystem::IPeekCursor>> cursors;
|
|
|
|
std::vector<LogMessageVersion> epochEnds;
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("TLogPeekSingleAddingLocal", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", history[0].first);
|
|
|
|
cursors.push_back(peekLocal(dbgid, tag, history[0].first, getPeekEnd(), false));
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < history.size(); i++) {
|
|
|
|
TraceEvent("TLogPeekSingleAddingOld", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("HistoryTag", history[i].second.toString())
|
|
|
|
.detail("Begin", i + 1 == history.size() ? begin : std::max(history[i + 1].first, begin))
|
|
|
|
.detail("End", history[i].first);
|
|
|
|
cursors.push_back(peekLocal(dbgid,
|
|
|
|
history[i].second,
|
|
|
|
i + 1 == history.size() ? begin : std::max(history[i + 1].first, begin),
|
|
|
|
history[i].first,
|
|
|
|
false));
|
2019-04-19 05:53:51 +08:00
|
|
|
epochEnds.emplace_back(history[i].first);
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2018-02-24 04:26:19 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::MultiCursor>(cursors, epochEnds);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-23 08:20:31 +08:00
|
|
|
// LogRouter or BackupWorker use this function to obtain a cursor for peeking tlogs of a generation (i.e., epoch).
|
|
|
|
// Specifically, the epoch is determined by looking up "dbgid" in tlog sets of generations.
|
|
|
|
// The returned cursor can peek data at the "tag" from the given "begin" version to that epoch's end version or
|
|
|
|
// the recovery version for the latest old epoch. For the current epoch, the cursor has no end version.
|
2020-03-21 11:09:32 +08:00
|
|
|
Reference<IPeekCursor> peekLogRouter(UID dbgid, Version begin, Tag tag) final {
|
2018-03-30 06:12:38 +08:00
|
|
|
bool found = false;
|
2019-05-23 01:52:46 +08:00
|
|
|
for (const auto& log : tLogs) {
|
|
|
|
found = log->hasLogRouter(dbgid) || log->hasBackupWorker(dbgid);
|
|
|
|
if (found) {
|
2018-03-30 06:12:38 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2019-05-23 01:52:46 +08:00
|
|
|
if (found) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (stopped) {
|
2018-05-06 08:56:00 +08:00
|
|
|
std::vector<Reference<LogSet>> localSets;
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
int bestPrimarySet = 0;
|
|
|
|
int bestSatelliteSet = -1;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : tLogs) {
|
|
|
|
if (log->isLocal && log->logServers.size()) {
|
|
|
|
TraceEvent("TLogPeekLogRouterLocalSet", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("LogServers", log->logServerString());
|
2018-05-06 08:56:00 +08:00
|
|
|
localSets.push_back(log);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->locality == tagLocalitySatellite) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
bestSatelliteSet = localSets.size() - 1;
|
|
|
|
} else {
|
|
|
|
bestPrimarySet = localSets.size() - 1;
|
2018-05-06 08:56:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
int bestSet = bestPrimarySet;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
|
|
|
|
tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
bestSet = bestSatelliteSet;
|
|
|
|
}
|
2018-05-06 08:56:00 +08:00
|
|
|
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent("TLogPeekLogRouterSets", dbgid).detail("Tag", tag.toString()).detail("Begin", begin);
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies
|
|
|
|
// across the WAN
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::SetPeekCursor>(
|
|
|
|
localSets, bestSet, localSets[bestSet]->bestLocationFor(tag), tag, begin, getPeekEnd(), true);
|
2018-05-06 08:56:00 +08:00
|
|
|
} else {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
int bestPrimarySet = -1;
|
|
|
|
int bestSatelliteSet = -1;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < tLogs.size(); i++) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
const auto& log = tLogs[i];
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->logServers.size() && log->isLocal) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
if (log->locality == tagLocalitySatellite) {
|
|
|
|
bestSatelliteSet = i;
|
|
|
|
break;
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (bestPrimarySet == -1)
|
|
|
|
bestPrimarySet = i;
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
}
|
2018-05-06 08:56:00 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
int bestSet = bestPrimarySet;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
|
|
|
|
tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
bestSet = bestSatelliteSet;
|
|
|
|
}
|
|
|
|
const auto& log = tLogs[bestSet];
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("TLogPeekLogRouterBestOnly", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("LogId", log->logServers[log->bestLocationFor(tag)]->get().id());
|
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
log->logServers[log->bestLocationFor(tag)], tag, begin, getPeekEnd(), false, true);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
}
|
2018-06-02 09:42:48 +08:00
|
|
|
bool firstOld = true;
|
2019-05-23 01:52:46 +08:00
|
|
|
for (const auto& old : oldLogData) {
|
2018-03-30 06:12:38 +08:00
|
|
|
found = false;
|
2019-05-23 01:52:46 +08:00
|
|
|
for (const auto& log : old.tLogs) {
|
|
|
|
found = log->hasLogRouter(dbgid) || log->hasBackupWorker(dbgid);
|
|
|
|
if (found) {
|
2018-03-30 06:12:38 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (found) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
int bestPrimarySet = 0;
|
|
|
|
int bestSatelliteSet = -1;
|
2018-04-09 12:24:05 +08:00
|
|
|
std::vector<Reference<LogSet>> localSets;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : old.tLogs) {
|
|
|
|
if (log->isLocal && log->logServers.size()) {
|
|
|
|
TraceEvent("TLogPeekLogRouterOldLocalSet", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("LogServers", log->logServerString());
|
2018-04-09 12:24:05 +08:00
|
|
|
localSets.push_back(log);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (log->locality == tagLocalitySatellite) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
bestSatelliteSet = localSets.size() - 1;
|
|
|
|
} else {
|
|
|
|
bestPrimarySet = localSets.size() - 1;
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
}
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
int bestSet = bestPrimarySet;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (SERVER_KNOBS->LOG_ROUTER_PEEK_FROM_SATELLITES_PREFERRED && bestSatelliteSet != -1 &&
|
|
|
|
old.tLogs[bestSatelliteSet]->tLogVersion >= TLogVersion::V4) {
|
Log Routers will prefer to peek from satellite logs.
Formerly, they would prefer to peek from the primary's logs. Testing of
a failed region rejoining the cluster revealed that this becomes quite a
strain on the primary logs when extremely large volumes of peek requests
are coming from the Log Routers. It happens that we have satellites
that contain the same mutations with Log Router tags, that have no other
peeking load, so we can prefer to use the satellite to peek rather than
the primary to distribute load across TLogs better.
Unfortunately, this revealed a latent bug in how tagged mutations in the
KnownCommittedVersion->RecoveryVersion gap were copied across
generations when the number of log router tags were decreased.
Satellite TLogs would be assigned log router tags using the
team-building based logic in getPushLocations(), whereas TLogs would
internally re-index tags according to tag.id%logRouterTags. This
mismatch would mean that we could have:
Log0 -2:0 ----- -2:0 Log 0
Log1 -2:1 \
>--- -2:1,-2:0 (-2:2 mod 2 becomes -2:0) Log 1
Log2 -2:2 /
And now we have data that's tagged as -2:0 on a TLog that's not the
preferred location for -2:0, and therefore a BestLocationOnly cursor
would miss the mutations.
This was never noticed before, as we never
used a satellite as a preferred location to peek from. Merge cursors
always peek from all locations, and thus a peek for -2:0 that needed
data from the satellites would have gone to both TLogs and merged the
results.
We now take this mod-based re-indexing into account when assigning which
TLogs need to recover which tags from the previous generation, to make
sure that tag.id%logRouterTags always results in the assigned TLog being
the preferred location.
Unfortunately, previously existing will potentially have existing
satellites with log router tags indexed incorrectly, so this transition
needs to be gated on a `log_version` transition. Old LogSets will have
an old LogVersion, and we won't prefer the sattelite for peeking. Log
Sets post-6.2 (opt-in) or post-6.3 (default) will be indexed correctly,
and therefore we can safely offload peeking onto the satellites.
2019-07-09 13:25:01 +08:00
|
|
|
bestSet = bestSatelliteSet;
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2021-02-11 05:45:06 +08:00
|
|
|
TraceEvent("TLogPeekLogRouterOldSets", dbgid)
|
|
|
|
.detail("Tag", tag.toString())
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("OldEpoch", old.epochEnd)
|
|
|
|
.detail("RecoveredAt", recoveredAt.present() ? recoveredAt.get() : -1)
|
|
|
|
.detail("FirstOld", firstOld);
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: do this merge on one of the logs in the other data center to avoid sending multiple copies
|
|
|
|
// across the WAN
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<ILogSystem::SetPeekCursor>(
|
2021-03-11 02:06:03 +08:00
|
|
|
localSets,
|
|
|
|
bestSet,
|
|
|
|
localSets[bestSet]->bestLocationFor(tag),
|
|
|
|
tag,
|
|
|
|
begin,
|
|
|
|
firstOld && recoveredAt.present() ? recoveredAt.get() + 1 : old.epochEnd,
|
|
|
|
true);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-06-02 09:42:48 +08:00
|
|
|
firstOld = false;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
return makeReference<ILogSystem::ServerPeekCursor>(
|
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>>(), tag, begin, getPeekEnd(), false, false);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
Version getKnownCommittedVersion() final {
|
2019-02-19 07:24:32 +08:00
|
|
|
Version result = invalidVersion;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : lockResults) {
|
2019-02-22 08:52:27 +08:00
|
|
|
auto versions = TagPartitionedLogSystem::getDurableVersion(dbgid, it);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (versions.present()) {
|
2019-02-22 08:52:27 +08:00
|
|
|
result = std::max(result, versions.get().first);
|
2018-10-03 08:44:14 +08:00
|
|
|
}
|
|
|
|
}
|
2019-02-19 07:24:32 +08:00
|
|
|
return result;
|
2018-10-03 08:44:14 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
Future<Void> onKnownCommittedVersionChange() final {
|
2019-02-19 07:24:32 +08:00
|
|
|
std::vector<Future<Void>> result;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : lockResults) {
|
2019-02-22 08:52:27 +08:00
|
|
|
result.push_back(TagPartitionedLogSystem::getDurableVersionChanged(it));
|
2018-10-03 08:44:14 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!result.size()) {
|
2019-02-19 07:24:32 +08:00
|
|
|
return Never();
|
|
|
|
}
|
|
|
|
return waitForAny(result);
|
2018-10-03 08:44:14 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void popLogRouter(Version upTo,
|
|
|
|
Tag tag,
|
|
|
|
Version durableKnownCommittedVersion,
|
|
|
|
int8_t popLocality) { // FIXME: do not need to pop all generations of old logs
|
|
|
|
if (!upTo)
|
|
|
|
return;
|
|
|
|
for (auto& t : tLogs) {
|
|
|
|
if (t->locality == popLocality) {
|
|
|
|
for (auto& log : t->logRouters) {
|
|
|
|
Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
|
2018-04-09 12:24:05 +08:00
|
|
|
if (prev < upTo)
|
2021-03-11 02:06:03 +08:00
|
|
|
outstandingPops[std::make_pair(log->get().id(), tag)] =
|
|
|
|
std::make_pair(upTo, durableKnownCommittedVersion);
|
2018-06-19 06:25:54 +08:00
|
|
|
if (prev == 0) {
|
2021-03-11 02:06:03 +08:00
|
|
|
popActors.add(popFromLog(
|
|
|
|
this, log, tag, 0.0)); // Fast pop time because log routers can only hold 5 seconds of data.
|
2018-06-19 06:25:54 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& old : oldLogData) {
|
|
|
|
for (auto& t : old.tLogs) {
|
|
|
|
if (t->locality == popLocality) {
|
|
|
|
for (auto& log : t->logRouters) {
|
|
|
|
Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
|
2018-04-09 12:24:05 +08:00
|
|
|
if (prev < upTo)
|
2021-03-11 02:06:03 +08:00
|
|
|
outstandingPops[std::make_pair(log->get().id(), tag)] =
|
|
|
|
std::make_pair(upTo, durableKnownCommittedVersion);
|
2018-04-09 12:24:05 +08:00
|
|
|
if (prev == 0)
|
2021-03-11 02:06:03 +08:00
|
|
|
popActors.add(popFromLog(this, log, tag, 0.0));
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
void popTxs(Version upTo, int8_t popLocality) final {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (getTLogVersion() < TLogVersion::V4) {
|
2019-08-01 09:27:36 +08:00
|
|
|
pop(upTo, txsTag, 0, popLocality);
|
2019-06-29 03:33:24 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < txsTags; i++) {
|
2019-08-01 09:27:36 +08:00
|
|
|
pop(upTo, Tag(tagLocalityTxs, i), 0, popLocality);
|
2019-06-20 09:15:09 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-28 00:11:56 +08:00
|
|
|
// pop 'tag.locality' type data up to the 'upTo' version
|
2020-03-21 11:09:32 +08:00
|
|
|
void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (upTo <= 0)
|
|
|
|
return;
|
2020-03-21 09:39:51 +08:00
|
|
|
if (tag.locality == tagLocalityRemoteLog) {
|
2018-06-27 09:20:28 +08:00
|
|
|
popLogRouter(upTo, tag, durableKnownCommittedVersion, popLocality);
|
2018-04-09 12:24:05 +08:00
|
|
|
return;
|
|
|
|
}
|
2020-03-21 09:39:51 +08:00
|
|
|
for (auto& t : tLogs) {
|
|
|
|
if (t->locality == tagLocalitySpecial || t->locality == tag.locality ||
|
|
|
|
tag.locality == tagLocalityUpgraded ||
|
|
|
|
(tag.locality < 0 && ((popLocality == tagLocalityInvalid) == t->isLocal))) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : t->logServers) {
|
|
|
|
Version prev = outstandingPops[std::make_pair(log->get().id(), tag)].first;
|
2020-03-21 09:39:51 +08:00
|
|
|
if (prev < upTo) {
|
|
|
|
// update pop version for popFromLog actor
|
2021-03-11 02:06:03 +08:00
|
|
|
outstandingPops[std::make_pair(log->get().id(), tag)] =
|
|
|
|
std::make_pair(upTo, durableKnownCommittedVersion);
|
2020-03-21 09:39:51 +08:00
|
|
|
}
|
|
|
|
if (prev == 0) {
|
|
|
|
// pop tag from log upto version defined in outstandingPops[].first
|
2021-03-11 02:06:03 +08:00
|
|
|
popActors.add(popFromLog(this, log, tag, 1.0)); //< FIXME: knob
|
2020-03-21 09:39:51 +08:00
|
|
|
}
|
2018-06-15 03:55:33 +08:00
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-25 01:41:36 +08:00
|
|
|
// pop tag from log up to the version defined in self->outstandingPops[].first
|
2020-03-21 09:39:51 +08:00
|
|
|
ACTOR static Future<Void> popFromLog(TagPartitionedLogSystem* self,
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<AsyncVar<OptionalInterface<TLogInterface>>> log,
|
|
|
|
Tag tag,
|
2020-03-21 09:39:51 +08:00
|
|
|
double time) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state Version last = 0;
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(time, TaskPriority::TLogPop));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-10-28 00:11:56 +08:00
|
|
|
// to: first is upto version, second is durableKnownComittedVersion
|
2021-03-11 02:06:03 +08:00
|
|
|
state std::pair<Version, Version> to = self->outstandingPops[std::make_pair(log->get().id(), tag)];
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-04-19 03:07:29 +08:00
|
|
|
if (to.first <= last) {
|
2021-03-11 02:06:03 +08:00
|
|
|
self->outstandingPops.erase(std::make_pair(log->get().id(), tag));
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!log->get().present())
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(log->get().interf().popMessages.getReply(TLogPopRequest(to.first, to.second, tag),
|
|
|
|
TaskPriority::TLogPop));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-04-19 03:07:29 +08:00
|
|
|
last = to.first;
|
2017-05-26 04:48:44 +08:00
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() == error_code_actor_cancelled)
|
|
|
|
throw;
|
|
|
|
TraceEvent((e.code() == error_code_broken_promise) ? SevInfo : SevError, "LogPopError", self->dbgid)
|
|
|
|
.error(e)
|
|
|
|
.detail("Log", log->get().id());
|
|
|
|
return Void(); // Leaving outstandingPops filled in means no further pop requests to this tlog from this
|
|
|
|
// logSystem
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Version> getPoppedFromTLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag) {
|
2019-08-06 08:01:48 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(TLogPeekReply rep =
|
|
|
|
wait(log->get().present() ? brokenPromiseToNever(log->get().interf().peekMessages.getReply(
|
|
|
|
TLogPeekRequest(-1, tag, false, false)))
|
|
|
|
: Never())) {
|
2019-08-06 08:01:48 +08:00
|
|
|
ASSERT(rep.popped.present());
|
|
|
|
return rep.popped.get();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(log->onChange())) {}
|
2019-08-06 08:01:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR static Future<Version> getPoppedTxs(TagPartitionedLogSystem* self) {
|
|
|
|
state std::vector<std::vector<Future<Version>>> poppedFutures;
|
|
|
|
state std::vector<Future<Void>> poppedReady;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->tLogs.size()) {
|
|
|
|
poppedFutures.push_back(std::vector<Future<Version>>());
|
|
|
|
for (auto& it : self->tLogs) {
|
|
|
|
for (auto& log : it->logServers) {
|
|
|
|
poppedFutures.back().push_back(getPoppedFromTLog(
|
|
|
|
log, self->tLogs[0]->tLogVersion < TLogVersion::V4 ? txsTag : Tag(tagLocalityTxs, 0)));
|
2019-08-06 08:01:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
poppedReady.push_back(waitForAny(poppedFutures.back()));
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& old : self->oldLogData) {
|
|
|
|
if (old.tLogs.size()) {
|
|
|
|
poppedFutures.push_back(std::vector<Future<Version>>());
|
|
|
|
for (auto& it : old.tLogs) {
|
|
|
|
for (auto& log : it->logServers) {
|
|
|
|
poppedFutures.back().push_back(getPoppedFromTLog(
|
|
|
|
log, old.tLogs[0]->tLogVersion < TLogVersion::V4 ? txsTag : Tag(tagLocalityTxs, 0)));
|
2019-08-06 08:01:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
poppedReady.push_back(waitForAny(poppedFutures.back()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-07 07:32:28 +08:00
|
|
|
state Future<Void> maxGetPoppedDuration = delay(SERVER_KNOBS->TXS_POPPED_MAX_DELAY);
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(waitForAll(poppedReady) || maxGetPoppedDuration);
|
2019-08-07 07:32:28 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (maxGetPoppedDuration.isReady()) {
|
2019-08-07 07:32:28 +08:00
|
|
|
TraceEvent(SevWarnAlways, "PoppedTxsNotReady", self->dbgid);
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
|
2019-08-06 08:01:48 +08:00
|
|
|
Version maxPopped = 1;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : poppedFutures) {
|
|
|
|
for (auto& v : it) {
|
|
|
|
if (v.isReady()) {
|
2019-08-06 08:01:48 +08:00
|
|
|
maxPopped = std::max(maxPopped, v.get());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return maxPopped;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Version> getTxsPoppedVersion() final { return getPoppedTxs(this); }
|
2019-08-06 08:01:48 +08:00
|
|
|
|
2017-10-06 08:09:44 +08:00
|
|
|
ACTOR static Future<Void> confirmEpochLive_internal(Reference<LogSet> logSet, Optional<UID> debugID) {
|
2017-08-29 04:46:14 +08:00
|
|
|
state vector<Future<Void>> alive;
|
2017-09-23 07:19:16 +08:00
|
|
|
int numPresent = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& t : logSet->logServers) {
|
|
|
|
if (t->get().present()) {
|
|
|
|
alive.push_back(brokenPromiseToNever(t->get().interf().confirmRunning.getReply(
|
|
|
|
TLogConfirmRunningRequest(debugID), TaskPriority::TLogConfirmRunningReply)));
|
2017-09-23 07:19:16 +08:00
|
|
|
numPresent++;
|
2017-08-29 04:46:14 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
alive.push_back(Never());
|
2017-08-29 04:46:14 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2017-08-29 04:46:14 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(quorum(alive, std::min(logSet->tLogReplicationFactor, numPresent - logSet->tLogWriteAntiQuorum)));
|
2017-09-22 08:59:30 +08:00
|
|
|
|
2019-03-14 06:59:03 +08:00
|
|
|
state std::vector<LocalityEntry> aliveEntries;
|
2019-03-14 05:47:17 +08:00
|
|
|
state std::vector<bool> responded(alive.size(), false);
|
2017-08-29 04:46:14 +08:00
|
|
|
loop {
|
|
|
|
for (int i = 0; i < alive.size(); i++) {
|
2017-11-16 13:05:10 +08:00
|
|
|
if (!responded[i] && alive[i].isReady() && !alive[i].isError()) {
|
2019-03-14 06:59:03 +08:00
|
|
|
aliveEntries.push_back(logSet->logEntryArray[i]);
|
2017-11-16 13:05:10 +08:00
|
|
|
responded[i] = true;
|
2017-08-29 04:46:14 +08:00
|
|
|
}
|
|
|
|
}
|
2019-03-14 05:47:17 +08:00
|
|
|
|
2019-03-14 06:59:03 +08:00
|
|
|
if (logSet->satisfiesPolicy(aliveEntries)) {
|
2017-08-29 04:46:14 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
// The current set of responders that we have weren't enough to form a quorum, so we must
|
|
|
|
// wait for more responses and try again.
|
|
|
|
std::vector<Future<Void>> changes;
|
|
|
|
for (int i = 0; i < alive.size(); i++) {
|
|
|
|
if (!alive[i].isReady()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
changes.push_back(ready(alive[i]));
|
2017-09-14 06:45:09 +08:00
|
|
|
} else if (alive[i].isReady() && alive[i].isError() &&
|
2017-09-14 06:49:39 +08:00
|
|
|
alive[i].getError().code() == error_code_tlog_stopped) {
|
2017-09-14 06:45:09 +08:00
|
|
|
// All commits must go to all TLogs. If any TLog is stopped, then our epoch has ended.
|
|
|
|
return Never();
|
2017-08-29 04:46:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
ASSERT(changes.size() != 0);
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(waitForAny(changes));
|
2017-08-29 04:46:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-08 07:32:43 +08:00
|
|
|
// Returns success after confirming that pushes in the current epoch are still possible
|
2020-03-21 11:09:32 +08:00
|
|
|
Future<Void> confirmEpochLive(Optional<UID> debugID) final {
|
2017-07-10 05:46:16 +08:00
|
|
|
vector<Future<Void>> quorumResults;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : tLogs) {
|
|
|
|
if (it->isLocal && it->logServers.size()) {
|
|
|
|
quorumResults.push_back(confirmEpochLive_internal(it, debugID));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2017-07-10 05:46:16 +08:00
|
|
|
return waitForAll(quorumResults);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
Future<Void> endEpoch() final {
|
2017-08-04 07:16:36 +08:00
|
|
|
std::vector<Future<Void>> lockResults;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& logSet : tLogs) {
|
|
|
|
for (auto& log : logSet->logServers) {
|
|
|
|
lockResults.push_back(success(lockTLog(dbgid, log)));
|
2017-08-04 07:16:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return waitForAll(lockResults);
|
|
|
|
}
|
|
|
|
|
2019-08-22 04:33:44 +08:00
|
|
|
// Call only after end_epoch() has successfully completed. Returns a new epoch immediately following this one.
|
|
|
|
// The new epoch is only provisional until the caller updates the coordinated DBCoreState.
|
|
|
|
Future<Reference<ILogSystem>> newEpoch(RecruitFromConfigurationReply const& recr,
|
|
|
|
Future<RecruitRemoteFromConfigurationReply> const& fRemoteWorkers,
|
2021-03-11 02:06:03 +08:00
|
|
|
DatabaseConfiguration const& config,
|
|
|
|
LogEpoch recoveryCount,
|
|
|
|
int8_t primaryLocality,
|
|
|
|
int8_t remoteLocality,
|
2019-08-22 04:33:44 +08:00
|
|
|
std::vector<Tag> const& allTags,
|
2020-03-21 11:09:32 +08:00
|
|
|
Reference<AsyncVar<bool>> const& recruitmentStalled) final {
|
2021-03-11 02:06:03 +08:00
|
|
|
return newEpoch(Reference<TagPartitionedLogSystem>::addRef(this),
|
|
|
|
recr,
|
|
|
|
fRemoteWorkers,
|
|
|
|
config,
|
|
|
|
recoveryCount,
|
|
|
|
primaryLocality,
|
|
|
|
remoteLocality,
|
|
|
|
allTags,
|
|
|
|
recruitmentStalled);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
LogSystemConfig getLogSystemConfig() const final {
|
2019-06-01 07:14:58 +08:00
|
|
|
LogSystemConfig logSystemConfig(epoch);
|
2017-05-26 04:48:44 +08:00
|
|
|
logSystemConfig.logSystemType = logSystemType;
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystemConfig.expectedLogSets = expectedLogSets;
|
2018-04-09 12:24:05 +08:00
|
|
|
logSystemConfig.logRouterTags = logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
logSystemConfig.txsTags = txsTags;
|
2018-04-21 04:25:22 +08:00
|
|
|
logSystemConfig.recruitmentID = recruitmentID;
|
2018-05-06 08:56:00 +08:00
|
|
|
logSystemConfig.stopped = stopped;
|
2018-06-27 09:20:28 +08:00
|
|
|
logSystemConfig.recoveredAt = recoveredAt;
|
2019-08-07 02:14:32 +08:00
|
|
|
logSystemConfig.pseudoLocalities = pseudoLocalities;
|
2019-08-15 08:00:20 +08:00
|
|
|
logSystemConfig.oldestBackupEpoch = oldestBackupEpoch;
|
2019-04-12 02:36:24 +08:00
|
|
|
for (const Reference<LogSet>& logSet : tLogs) {
|
|
|
|
if (logSet->isLocal || remoteLogsWrittenToCoreState) {
|
2019-04-19 01:18:11 +08:00
|
|
|
logSystemConfig.tLogs.emplace_back(*logSet);
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!recoveryCompleteWrittenToCoreState.get()) {
|
2019-04-12 02:36:24 +08:00
|
|
|
for (const auto& oldData : oldLogData) {
|
2019-04-19 04:41:37 +08:00
|
|
|
logSystemConfig.oldTLogs.emplace_back(oldData);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return logSystemConfig;
|
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
Standalone<StringRef> getLogsValue() const final {
|
2017-05-26 04:48:44 +08:00
|
|
|
vector<std::pair<UID, NetworkAddress>> logs;
|
|
|
|
vector<std::pair<UID, NetworkAddress>> oldLogs;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& t : tLogs) {
|
|
|
|
if (t->isLocal || remoteLogsWrittenToCoreState) {
|
|
|
|
for (int i = 0; i < t->logServers.size(); i++) {
|
|
|
|
logs.emplace_back(t->logServers[i]->get().id(),
|
|
|
|
t->logServers[i]->get().present() ? t->logServers[i]->get().interf().address()
|
|
|
|
: NetworkAddress());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!recoveryCompleteWrittenToCoreState.get()) {
|
|
|
|
for (int i = 0; i < oldLogData.size(); i++) {
|
|
|
|
for (auto& t : oldLogData[i].tLogs) {
|
|
|
|
for (int j = 0; j < t->logServers.size(); j++) {
|
|
|
|
oldLogs.emplace_back(t->logServers[j]->get().id(),
|
|
|
|
t->logServers[j]->get().present()
|
|
|
|
? t->logServers[j]->get().interf().address()
|
|
|
|
: NetworkAddress());
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
return logsValue(logs, oldLogs);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
Future<Void> onLogSystemConfigChange() final {
|
2017-05-26 04:48:44 +08:00
|
|
|
std::vector<Future<Void>> changes;
|
2018-04-22 03:57:00 +08:00
|
|
|
changes.push_back(logSystemConfigChanged.onTrigger());
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& t : tLogs) {
|
|
|
|
for (int i = 0; i < t->logServers.size(); i++) {
|
|
|
|
changes.push_back(t->logServers[i]->onChange());
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < oldLogData.size(); i++) {
|
|
|
|
for (auto& t : oldLogData[i].tLogs) {
|
|
|
|
for (int j = 0; j < t->logServers.size(); j++) {
|
|
|
|
changes.push_back(t->logServers[j]->onChange());
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (hasRemoteServers && !remoteRecovery.isReady()) {
|
2018-04-09 12:24:05 +08:00
|
|
|
changes.push_back(remoteRecovery);
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
return waitForAny(changes);
|
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
Version getEnd() const final {
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(recoverAt.present());
|
2018-06-27 09:20:28 +08:00
|
|
|
return recoverAt.get() + 1;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
Version getPeekEnd() const {
|
2018-06-27 09:20:28 +08:00
|
|
|
if (recoverAt.present())
|
2017-05-26 04:48:44 +08:00
|
|
|
return getEnd();
|
|
|
|
else
|
|
|
|
return std::numeric_limits<Version>::max();
|
|
|
|
}
|
|
|
|
|
2020-11-24 10:02:47 +08:00
|
|
|
void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) const final {
|
2017-07-10 05:46:16 +08:00
|
|
|
int locationOffset = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : tLogs) {
|
|
|
|
if (log->isLocal && log->logServers.size()) {
|
2019-03-11 22:31:44 +08:00
|
|
|
log->getPushLocations(tags, locations, locationOffset, allLocations);
|
2017-07-12 06:48:10 +08:00
|
|
|
locationOffset += log->logServers.size();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool hasRemoteLogs() const final { return logRouterTags > 0 || pseudoLocalities.size() > 0; }
|
2018-01-09 04:04:19 +08:00
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
Tag getRandomRouterTag() const final {
|
2019-05-11 05:01:52 +08:00
|
|
|
return Tag(tagLocalityLogRouter, deterministicRandom()->randomInt(0, logRouterTags));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Tag getRandomTxsTag() const final { return Tag(tagLocalityTxs, deterministicRandom()->randomInt(0, txsTags)); }
|
2019-06-20 09:15:09 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TLogVersion getTLogVersion() const final { return tLogs[0]->tLogVersion; }
|
2019-07-17 10:09:53 +08:00
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
int getLogRouterTags() const final { return logRouterTags; }
|
2019-07-24 02:45:04 +08:00
|
|
|
|
2020-03-09 11:50:32 +08:00
|
|
|
Version getBackupStartVersion() const final {
|
2019-07-24 02:45:04 +08:00
|
|
|
ASSERT(tLogs.size() > 0);
|
2020-03-09 11:50:32 +08:00
|
|
|
return backupStartVersion;
|
2019-07-24 02:45:04 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
std::map<LogEpoch, ILogSystem::EpochTagsVersionsInfo> getOldEpochTagsVersionsInfo() const final {
|
2019-10-02 05:55:08 +08:00
|
|
|
std::map<LogEpoch, EpochTagsVersionsInfo> epochInfos;
|
2019-07-30 01:37:42 +08:00
|
|
|
for (const auto& old : oldLogData) {
|
2019-10-02 05:55:08 +08:00
|
|
|
epochInfos.insert(
|
|
|
|
{ old.epoch, ILogSystem::EpochTagsVersionsInfo(old.logRouterTags, old.epochBegin, old.epochEnd) });
|
2019-10-01 04:16:28 +08:00
|
|
|
TraceEvent("OldEpochTagsVersions", dbgid)
|
|
|
|
.detail("Epoch", old.epoch)
|
|
|
|
.detail("Tags", old.logRouterTags)
|
2019-10-02 05:55:08 +08:00
|
|
|
.detail("BeginVersion", old.epochBegin)
|
2019-10-01 04:16:28 +08:00
|
|
|
.detail("EndVersion", old.epochEnd);
|
2019-07-24 02:45:04 +08:00
|
|
|
}
|
2019-10-02 05:55:08 +08:00
|
|
|
return epochInfos;
|
2019-07-24 02:45:04 +08:00
|
|
|
}
|
|
|
|
|
2019-08-13 10:10:46 +08:00
|
|
|
inline Reference<LogSet> getEpochLogSet(LogEpoch epoch) const {
|
|
|
|
for (const auto& old : oldLogData) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (epoch == old.epoch)
|
|
|
|
return old.tLogs[0];
|
2019-08-13 10:10:46 +08:00
|
|
|
}
|
|
|
|
return Reference<LogSet>(nullptr);
|
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
void setBackupWorkers(const std::vector<InitializeBackupReply>& replies) final {
|
2019-07-24 02:45:04 +08:00
|
|
|
ASSERT(tLogs.size() > 0);
|
2019-08-13 10:10:46 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<LogSet> logset = tLogs[0]; // Master recruits this epoch's worker first.
|
2019-08-13 10:10:46 +08:00
|
|
|
LogEpoch logsetEpoch = this->epoch;
|
2019-08-15 08:00:20 +08:00
|
|
|
oldestBackupEpoch = this->epoch;
|
2020-01-17 11:16:23 +08:00
|
|
|
for (const auto& reply : replies) {
|
2020-04-20 12:39:47 +08:00
|
|
|
if (removedBackupWorkers.count(reply.interf.id()) > 0) {
|
|
|
|
removedBackupWorkers.erase(reply.interf.id());
|
|
|
|
continue;
|
|
|
|
}
|
2020-11-07 15:50:55 +08:00
|
|
|
auto worker = makeReference<AsyncVar<OptionalInterface<BackupInterface>>>(
|
|
|
|
OptionalInterface<BackupInterface>(reply.interf));
|
2020-01-17 11:16:23 +08:00
|
|
|
if (reply.backupEpoch != logsetEpoch) {
|
2019-08-13 10:10:46 +08:00
|
|
|
// find the logset from oldLogData
|
2020-01-17 11:16:23 +08:00
|
|
|
logsetEpoch = reply.backupEpoch;
|
2019-08-15 08:00:20 +08:00
|
|
|
oldestBackupEpoch = std::min(oldestBackupEpoch, logsetEpoch);
|
2019-08-13 10:10:46 +08:00
|
|
|
logset = getEpochLogSet(logsetEpoch);
|
|
|
|
ASSERT(logset.isValid());
|
|
|
|
}
|
|
|
|
logset->backupWorkers.push_back(worker);
|
2020-04-20 12:39:47 +08:00
|
|
|
TraceEvent("AddBackupWorker", dbgid)
|
|
|
|
.detail("Epoch", logsetEpoch)
|
|
|
|
.detail("BackupWorkerID", reply.interf.id());
|
2019-08-13 10:10:46 +08:00
|
|
|
}
|
2020-03-18 05:45:07 +08:00
|
|
|
TraceEvent("SetOldestBackupEpoch", dbgid).detail("Epoch", oldestBackupEpoch);
|
2019-08-15 05:19:50 +08:00
|
|
|
backupWorkerChanged.trigger();
|
2019-07-24 02:45:04 +08:00
|
|
|
}
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
bool removeBackupWorker(const BackupWorkerDoneRequest& req) final {
|
2019-08-15 08:00:20 +08:00
|
|
|
bool removed = false;
|
|
|
|
Reference<LogSet> logset = getEpochLogSet(req.backupEpoch);
|
|
|
|
if (logset.isValid()) {
|
|
|
|
for (auto it = logset->backupWorkers.begin(); it != logset->backupWorkers.end(); it++) {
|
2019-09-24 05:14:16 +08:00
|
|
|
if (it->getPtr()->get().interf().id() == req.workerUID) {
|
2019-08-15 08:00:20 +08:00
|
|
|
logset->backupWorkers.erase(it);
|
|
|
|
removed = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (removed) {
|
|
|
|
oldestBackupEpoch = epoch;
|
|
|
|
for (const auto& old : oldLogData) {
|
|
|
|
if (old.epoch < oldestBackupEpoch && old.tLogs[0]->backupWorkers.size() > 0) {
|
|
|
|
oldestBackupEpoch = old.epoch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
backupWorkerChanged.trigger();
|
2020-04-20 12:39:47 +08:00
|
|
|
} else {
|
|
|
|
removedBackupWorkers.insert(req.workerUID);
|
2019-08-15 08:00:20 +08:00
|
|
|
}
|
|
|
|
|
2020-01-23 11:34:40 +08:00
|
|
|
TraceEvent("RemoveBackupWorker", dbgid)
|
|
|
|
.detail("Removed", removed)
|
2019-08-15 08:00:20 +08:00
|
|
|
.detail("BackupEpoch", req.backupEpoch)
|
|
|
|
.detail("WorkerID", req.workerUID)
|
|
|
|
.detail("OldestBackupEpoch", oldestBackupEpoch);
|
|
|
|
return removed;
|
|
|
|
}
|
|
|
|
|
2020-03-21 04:58:20 +08:00
|
|
|
LogEpoch getOldestBackupEpoch() const final { return oldestBackupEpoch; }
|
|
|
|
|
2020-03-21 11:09:32 +08:00
|
|
|
void setOldestBackupEpoch(LogEpoch epoch) final {
|
|
|
|
oldestBackupEpoch = epoch;
|
|
|
|
backupWorkerChanged.trigger();
|
|
|
|
}
|
2019-08-15 08:00:20 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> monitorLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> logServer,
|
|
|
|
Reference<AsyncVar<bool>> failed) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state Future<Void> waitFailure;
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (logServer->get().present())
|
|
|
|
waitFailure = waitFailureTracker(logServer->get().interf().waitFailure, failed);
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
|
|
|
failed->set(true);
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(logServer->onChange());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Optional<std::pair<Version, Version>> static getDurableVersion(
|
|
|
|
UID dbgid,
|
|
|
|
LogLockInfo lockInfo,
|
|
|
|
std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>(),
|
|
|
|
Optional<Version> lastEnd = Optional<Version>()) {
|
2018-05-01 09:32:04 +08:00
|
|
|
Reference<LogSet> logSet = lockInfo.logSet;
|
2021-03-11 02:06:03 +08:00
|
|
|
// To ensure consistent recovery, the number of servers NOT in the write quorum plus the number of servers NOT
|
|
|
|
// in the read quorum have to be strictly less than the replication factor. Otherwise there could be a replica
|
|
|
|
// set consistent entirely of servers that are out of date due to not being in the write quorum or unavailable
|
|
|
|
// due to not being in the read quorum. So with N = # of tlogs, W = antiquorum, R = required count, F =
|
|
|
|
// replication factor, W + (N - R) < F, and optimally (N-W)+(N-R)=F-1. Thus R=N+1-F+W.
|
|
|
|
int requiredCount =
|
|
|
|
(int)logSet->logServers.size() + 1 - logSet->tLogReplicationFactor + logSet->tLogWriteAntiQuorum;
|
|
|
|
ASSERT(requiredCount > 0 && requiredCount <= logSet->logServers.size());
|
|
|
|
ASSERT(logSet->tLogReplicationFactor >= 1 && logSet->tLogReplicationFactor <= logSet->logServers.size());
|
|
|
|
ASSERT(logSet->tLogWriteAntiQuorum >= 0 && logSet->tLogWriteAntiQuorum < logSet->logServers.size());
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
std::vector<LocalityData> availableItems, badCombo;
|
|
|
|
std::vector<TLogLockResult> results;
|
|
|
|
std::string sServerState;
|
|
|
|
LocalityGroup unResponsiveSet;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < logSet->logServers.size(); t++) {
|
|
|
|
if (lockInfo.replies[t].isReady() && !lockInfo.replies[t].isError() &&
|
|
|
|
(!failed.size() || !failed[t]->get())) {
|
2018-05-01 09:32:04 +08:00
|
|
|
results.push_back(lockInfo.replies[t].get());
|
|
|
|
availableItems.push_back(logSet->tLogLocalities[t]);
|
|
|
|
sServerState += 'a';
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2018-05-01 09:32:04 +08:00
|
|
|
unResponsiveSet.add(logSet->tLogLocalities[t]);
|
|
|
|
sServerState += 'f';
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
2018-05-01 09:32:04 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
// Check if the list of results is not larger than the anti quorum
|
|
|
|
bool bTooManyFailures = (results.size() <= logSet->tLogWriteAntiQuorum);
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
// Check if failed logs complete the policy
|
2021-03-11 02:06:03 +08:00
|
|
|
bTooManyFailures = bTooManyFailures || ((unResponsiveSet.size() >= logSet->tLogReplicationFactor) &&
|
|
|
|
(unResponsiveSet.validate(logSet->tLogPolicy)));
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
// Check all combinations of the AntiQuorum within the failed
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!bTooManyFailures && (logSet->tLogWriteAntiQuorum) &&
|
|
|
|
(!validateAllCombinations(
|
|
|
|
badCombo, unResponsiveSet, logSet->tLogPolicy, availableItems, logSet->tLogWriteAntiQuorum, false))) {
|
|
|
|
TraceEvent("EpochEndBadCombo", dbgid)
|
|
|
|
.detail("Required", requiredCount)
|
|
|
|
.detail("Present", results.size())
|
|
|
|
.detail("ServerState", sServerState);
|
2018-05-01 09:32:04 +08:00
|
|
|
bTooManyFailures = true;
|
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
ASSERT(logSet->logServers.size() == lockInfo.replies.size());
|
|
|
|
if (!bTooManyFailures) {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::sort(results.begin(), results.end(), sort_by_end());
|
2018-05-01 09:32:04 +08:00
|
|
|
int absent = logSet->logServers.size() - results.size();
|
|
|
|
int safe_range_begin = logSet->tLogWriteAntiQuorum;
|
2021-03-11 02:06:03 +08:00
|
|
|
int new_safe_range_begin = std::min(logSet->tLogWriteAntiQuorum, (int)(results.size() - 1));
|
2018-05-01 09:32:04 +08:00
|
|
|
int safe_range_end = logSet->tLogReplicationFactor - absent;
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!lastEnd.present() || ((safe_range_end > 0) && (safe_range_end - 1 < results.size()) &&
|
|
|
|
results[safe_range_end - 1].end < lastEnd.get())) {
|
2019-02-28 07:15:20 +08:00
|
|
|
Version knownCommittedVersion = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < results.size(); i++) {
|
2018-04-09 12:24:05 +08:00
|
|
|
knownCommittedVersion = std::max(knownCommittedVersion, results[i].knownCommittedVersion);
|
|
|
|
}
|
|
|
|
|
2019-06-28 06:44:44 +08:00
|
|
|
if (knownCommittedVersion > results[new_safe_range_begin].end) {
|
|
|
|
knownCommittedVersion = results[new_safe_range_begin].end;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("GetDurableResult", dbgid)
|
|
|
|
.detail("Required", requiredCount)
|
|
|
|
.detail("Present", results.size())
|
|
|
|
.detail("ServerState", sServerState)
|
|
|
|
.detail("RecoveryVersion",
|
|
|
|
((safe_range_end > 0) && (safe_range_end - 1 < results.size()))
|
|
|
|
? results[safe_range_end - 1].end
|
|
|
|
: -1)
|
|
|
|
.detail("EndVersion", results[new_safe_range_begin].end)
|
|
|
|
.detail("SafeBegin", safe_range_begin)
|
|
|
|
.detail("SafeEnd", safe_range_end)
|
|
|
|
.detail("NewSafeBegin", new_safe_range_begin)
|
|
|
|
.detail("KnownCommittedVersion", knownCommittedVersion)
|
|
|
|
.detail("EpochEnd", lockInfo.epochEnd);
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
return std::make_pair(knownCommittedVersion, results[new_safe_range_begin].end);
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
2018-05-01 09:32:04 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("GetDurableResultWaiting", dbgid)
|
|
|
|
.detail("Required", requiredCount)
|
|
|
|
.detail("Present", results.size())
|
|
|
|
.detail("ServerState", sServerState);
|
|
|
|
return Optional<std::pair<Version, Version>>();
|
2018-05-01 09:32:04 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> getDurableVersionChanged(
|
|
|
|
LogLockInfo lockInfo,
|
|
|
|
std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>()) {
|
2018-05-01 09:32:04 +08:00
|
|
|
// Wait for anything relevant to change
|
|
|
|
std::vector<Future<Void>> changes;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int j = 0; j < lockInfo.logSet->logServers.size(); j++) {
|
2018-05-01 09:32:04 +08:00
|
|
|
if (!lockInfo.replies[j].isReady())
|
2021-03-11 02:06:03 +08:00
|
|
|
changes.push_back(ready(lockInfo.replies[j]));
|
2018-05-01 09:32:04 +08:00
|
|
|
else {
|
2021-03-11 02:06:03 +08:00
|
|
|
changes.push_back(lockInfo.logSet->logServers[j]->onChange());
|
|
|
|
if (failed.size()) {
|
|
|
|
changes.push_back(failed[j]->onChange());
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-05-01 09:32:04 +08:00
|
|
|
ASSERT(changes.size());
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(waitForAny(changes));
|
2018-05-01 09:32:04 +08:00
|
|
|
return Void();
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> epochEnd(Reference<AsyncVar<Reference<ILogSystem>>> outLogSystem,
|
|
|
|
UID dbgid,
|
|
|
|
DBCoreState prevState,
|
|
|
|
FutureStream<TLogRejoinRequest> rejoinRequests,
|
|
|
|
LocalityData locality,
|
|
|
|
bool* forceRecovery) {
|
|
|
|
// Stops a co-quorum of tlogs so that no further versions can be committed until the DBCoreState coordination
|
|
|
|
// state is changed Creates a new logSystem representing the (now frozen) epoch No other important side effects.
|
2017-05-26 04:48:44 +08:00
|
|
|
// The writeQuorum in the master info is from the previous configuration
|
|
|
|
|
|
|
|
if (!prevState.tLogs.size()) {
|
|
|
|
// This is a brand new database
|
2020-11-07 15:50:55 +08:00
|
|
|
auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, 0);
|
2017-05-26 04:48:44 +08:00
|
|
|
logSystem->logSystemType = prevState.logSystemType;
|
2018-06-27 09:20:28 +08:00
|
|
|
logSystem->recoverAt = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
logSystem->knownCommittedVersion = 0;
|
2018-05-06 08:56:00 +08:00
|
|
|
logSystem->stopped = true;
|
2017-05-26 04:48:44 +08:00
|
|
|
outLogSystem->set(logSystem);
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(Future<Void>(Never()));
|
2017-05-26 04:48:44 +08:00
|
|
|
throw internal_error();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (*forceRecovery) {
|
2018-07-03 11:48:22 +08:00
|
|
|
DBCoreState modifiedState = prevState;
|
2018-07-04 01:42:58 +08:00
|
|
|
|
|
|
|
int8_t primaryLocality = -1;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& coreSet : modifiedState.tLogs) {
|
|
|
|
if (coreSet.isLocal && coreSet.locality >= 0 && coreSet.tLogLocalities[0].dcId() != locality.dcId()) {
|
2018-07-04 01:42:58 +08:00
|
|
|
primaryLocality = coreSet.locality;
|
2018-07-03 11:48:22 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-07-04 01:42:58 +08:00
|
|
|
|
2018-07-07 08:09:29 +08:00
|
|
|
bool foundRemote = false;
|
|
|
|
int8_t remoteLocality = -1;
|
|
|
|
int modifiedLogSets = 0;
|
|
|
|
int removedLogSets = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (primaryLocality >= 0) {
|
2018-07-04 01:42:58 +08:00
|
|
|
auto copiedLogs = modifiedState.tLogs;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& coreSet : copiedLogs) {
|
|
|
|
if (coreSet.locality != primaryLocality && coreSet.locality >= 0) {
|
2018-07-03 11:48:22 +08:00
|
|
|
foundRemote = true;
|
2018-07-04 01:42:58 +08:00
|
|
|
remoteLocality = coreSet.locality;
|
|
|
|
modifiedState.tLogs.clear();
|
|
|
|
modifiedState.tLogs.push_back(coreSet);
|
|
|
|
modifiedState.tLogs[0].isLocal = true;
|
|
|
|
modifiedState.logRouterTags = 0;
|
2018-07-07 08:09:29 +08:00
|
|
|
modifiedLogSets++;
|
2018-07-03 11:48:22 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-07-04 01:42:58 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
while (!foundRemote && modifiedState.oldTLogData.size()) {
|
|
|
|
for (auto& coreSet : modifiedState.oldTLogData[0].tLogs) {
|
|
|
|
if (coreSet.locality != primaryLocality && coreSet.locality >= tagLocalitySpecial) {
|
2018-07-04 01:42:58 +08:00
|
|
|
foundRemote = true;
|
|
|
|
remoteLocality = coreSet.locality;
|
2018-09-29 03:23:09 +08:00
|
|
|
modifiedState.tLogs.clear();
|
|
|
|
modifiedState.tLogs.push_back(coreSet);
|
|
|
|
modifiedState.tLogs[0].isLocal = true;
|
|
|
|
modifiedState.logRouterTags = 0;
|
2019-06-29 11:51:16 +08:00
|
|
|
modifiedState.txsTags = modifiedState.oldTLogData[0].txsTags;
|
2018-09-29 03:23:09 +08:00
|
|
|
modifiedLogSets++;
|
2018-07-04 01:42:58 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
modifiedState.oldTLogData.erase(modifiedState.oldTLogData.begin());
|
2018-07-07 08:09:29 +08:00
|
|
|
removedLogSets++;
|
2018-07-04 01:42:58 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (foundRemote) {
|
|
|
|
for (int i = 0; i < modifiedState.oldTLogData.size(); i++) {
|
2018-07-04 01:42:58 +08:00
|
|
|
bool found = false;
|
|
|
|
auto copiedLogs = modifiedState.oldTLogData[i].tLogs;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& coreSet : copiedLogs) {
|
|
|
|
if (coreSet.locality == remoteLocality || coreSet.locality == tagLocalitySpecial) {
|
2018-07-04 01:42:58 +08:00
|
|
|
found = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!coreSet.isLocal || copiedLogs.size() > 1) {
|
2018-07-04 01:42:58 +08:00
|
|
|
modifiedState.oldTLogData[i].tLogs.clear();
|
|
|
|
modifiedState.oldTLogData[i].tLogs.push_back(coreSet);
|
|
|
|
modifiedState.oldTLogData[i].tLogs[0].isLocal = true;
|
|
|
|
modifiedState.oldTLogData[i].logRouterTags = 0;
|
2019-10-02 04:38:52 +08:00
|
|
|
modifiedState.oldTLogData[i].epochBegin =
|
|
|
|
modifiedState.oldTLogData[i].tLogs[0].startVersion;
|
|
|
|
modifiedState.oldTLogData[i].epochEnd =
|
|
|
|
(i == 0 ? modifiedState.tLogs[0].startVersion
|
|
|
|
: modifiedState.oldTLogData[i - 1].tLogs[0].startVersion);
|
2018-07-07 08:09:29 +08:00
|
|
|
modifiedLogSets++;
|
2018-07-04 01:42:58 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!found) {
|
|
|
|
modifiedState.oldTLogData.erase(modifiedState.oldTLogData.begin() + i);
|
2018-07-07 08:09:29 +08:00
|
|
|
removedLogSets++;
|
2018-07-04 01:42:58 +08:00
|
|
|
i--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
prevState = modifiedState;
|
2018-09-29 03:25:27 +08:00
|
|
|
} else {
|
2019-02-19 07:13:18 +08:00
|
|
|
*forceRecovery = false;
|
2018-07-03 11:48:22 +08:00
|
|
|
}
|
2019-02-19 07:13:18 +08:00
|
|
|
} else {
|
|
|
|
*forceRecovery = false;
|
2018-07-03 11:48:22 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(SevWarnAlways, "ForcedRecovery", dbgid)
|
|
|
|
.detail("PrimaryLocality", primaryLocality)
|
|
|
|
.detail("RemoteLocality", remoteLocality)
|
|
|
|
.detail("FoundRemote", foundRemote)
|
|
|
|
.detail("Modified", modifiedLogSets)
|
|
|
|
.detail("Removed", removedLogSets);
|
|
|
|
for (int i = 0; i < prevState.tLogs.size(); i++) {
|
|
|
|
TraceEvent("ForcedRecoveryTLogs", dbgid)
|
|
|
|
.detail("I", i)
|
|
|
|
.detail("Log", ::describe(prevState.tLogs[i].tLogs))
|
|
|
|
.detail("Loc", prevState.tLogs[i].locality)
|
|
|
|
.detail("Txs", prevState.txsTags);
|
2019-02-19 07:13:18 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < prevState.oldTLogData.size(); i++) {
|
|
|
|
for (int j = 0; j < prevState.oldTLogData[i].tLogs.size(); j++) {
|
|
|
|
TraceEvent("ForcedRecoveryTLogs", dbgid)
|
|
|
|
.detail("I", i)
|
|
|
|
.detail("J", j)
|
|
|
|
.detail("Log", ::describe(prevState.oldTLogData[i].tLogs[j].tLogs))
|
|
|
|
.detail("Loc", prevState.oldTLogData[i].tLogs[j].locality)
|
|
|
|
.detail("Txs", prevState.oldTLogData[i].txsTags);
|
2019-02-19 07:13:18 +08:00
|
|
|
}
|
|
|
|
}
|
2018-07-03 11:48:22 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // Master recovery from pre-existing database
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// trackRejoins listens for rejoin requests from the tLogs that we are recovering from, to learn their
|
|
|
|
// TLogInterfaces
|
2018-03-30 06:12:38 +08:00
|
|
|
state std::vector<LogLockInfo> lockResults;
|
2021-03-11 02:06:03 +08:00
|
|
|
state
|
|
|
|
std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
|
|
|
|
allLogServers;
|
2017-07-12 06:48:10 +08:00
|
|
|
state std::vector<Reference<LogSet>> logServers;
|
2017-05-26 04:48:44 +08:00
|
|
|
state std::vector<OldLogData> oldLogData;
|
2017-07-10 05:46:16 +08:00
|
|
|
state std::vector<std::vector<Reference<AsyncVar<bool>>>> logFailed;
|
2017-05-26 04:48:44 +08:00
|
|
|
state std::vector<Future<Void>> failureTrackers;
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2019-04-20 00:41:09 +08:00
|
|
|
for (const CoreTLogSet& coreSet : prevState.tLogs) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logServers.push_back(makeReference<LogSet>(coreSet));
|
2017-07-10 05:46:16 +08:00
|
|
|
std::vector<Reference<AsyncVar<bool>>> failed;
|
2020-04-18 08:37:14 +08:00
|
|
|
|
2019-04-20 00:41:09 +08:00
|
|
|
for (const auto& logVar : logServers.back()->logServers) {
|
2021-05-11 07:32:02 +08:00
|
|
|
allLogServers.emplace_back(logVar, coreSet.tLogPolicy);
|
2020-11-07 13:13:10 +08:00
|
|
|
failed.push_back(makeReference<AsyncVar<bool>>());
|
2019-04-11 02:21:27 +08:00
|
|
|
failureTrackers.push_back(monitorLog(logVar, failed.back()));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
logFailed.push_back(failed);
|
|
|
|
}
|
2019-04-20 00:41:09 +08:00
|
|
|
|
|
|
|
for (const auto& oldTlogData : prevState.oldTLogData) {
|
|
|
|
oldLogData.emplace_back(oldTlogData);
|
|
|
|
|
|
|
|
for (const auto& logSet : oldLogData.back().tLogs) {
|
2020-04-19 12:29:38 +08:00
|
|
|
for (const auto& logVar : logSet->logServers) {
|
2021-05-11 07:32:02 +08:00
|
|
|
allLogServers.emplace_back(logVar, logSet->tLogPolicy);
|
2020-04-18 08:37:14 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
state Future<Void> rejoins = trackRejoins(dbgid, allLogServers, rejoinRequests);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-03-30 06:12:38 +08:00
|
|
|
lockResults.resize(logServers.size());
|
2018-04-09 12:24:05 +08:00
|
|
|
std::set<int8_t> lockedLocalities;
|
2018-10-03 08:45:11 +08:00
|
|
|
bool foundSpecial = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < logServers.size(); i++) {
|
|
|
|
if (logServers[i]->locality == tagLocalitySpecial || logServers[i]->locality == tagLocalityUpgraded) {
|
2018-04-09 12:24:05 +08:00
|
|
|
foundSpecial = true;
|
|
|
|
}
|
|
|
|
lockedLocalities.insert(logServers[i]->locality);
|
2018-04-23 02:14:13 +08:00
|
|
|
lockResults[i].isCurrent = true;
|
2018-04-09 12:24:05 +08:00
|
|
|
lockResults[i].logSet = logServers[i];
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < logServers[i]->logServers.size(); t++) {
|
|
|
|
lockResults[i].replies.push_back(lockTLog(dbgid, logServers[i]->logServers[t]));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& old : oldLogData) {
|
|
|
|
if (foundSpecial) {
|
2018-04-09 12:24:05 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : old.tLogs) {
|
|
|
|
if (log->locality == tagLocalitySpecial || log->locality == tagLocalityUpgraded) {
|
2018-04-09 12:24:05 +08:00
|
|
|
foundSpecial = true;
|
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!lockedLocalities.count(log->locality)) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent("EpochEndLockExtra").detail("Locality", log->locality);
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // locking old generations for version information
|
2018-04-09 12:24:05 +08:00
|
|
|
lockedLocalities.insert(log->locality);
|
|
|
|
LogLockInfo lockResult;
|
2018-04-22 07:03:28 +08:00
|
|
|
lockResult.epochEnd = old.epochEnd;
|
2018-04-09 12:24:05 +08:00
|
|
|
lockResult.logSet = log;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < log->logServers.size(); t++) {
|
|
|
|
lockResult.replies.push_back(lockTLog(dbgid, log->logServers[t]));
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
|
|
|
lockResults.push_back(lockResult);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (*forceRecovery) {
|
2018-09-29 03:25:27 +08:00
|
|
|
state std::vector<LogLockInfo> allLockResults;
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(lockResults.size() == 1);
|
2018-09-29 03:25:27 +08:00
|
|
|
allLockResults.push_back(lockResults[0]);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& old : oldLogData) {
|
|
|
|
ASSERT(old.tLogs.size() == 1);
|
2018-09-29 03:25:27 +08:00
|
|
|
LogLockInfo lockResult;
|
|
|
|
lockResult.epochEnd = old.epochEnd;
|
|
|
|
lockResult.logSet = old.tLogs[0];
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int t = 0; t < old.tLogs[0]->logServers.size(); t++) {
|
|
|
|
lockResult.replies.push_back(lockTLog(dbgid, old.tLogs[0]->logServers[t]));
|
2018-09-29 03:25:27 +08:00
|
|
|
}
|
|
|
|
allLockResults.push_back(lockResult);
|
|
|
|
}
|
|
|
|
|
|
|
|
state int lockNum = 0;
|
|
|
|
state Version maxRecoveryVersion = 0;
|
|
|
|
state int maxRecoveryIndex = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (lockNum < allLockResults.size()) {
|
2018-09-29 03:25:27 +08:00
|
|
|
auto versions = TagPartitionedLogSystem::getDurableVersion(dbgid, allLockResults[lockNum]);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (versions.present()) {
|
|
|
|
if (versions.get().second > maxRecoveryVersion) {
|
|
|
|
TraceEvent("HigherRecoveryVersion", dbgid)
|
|
|
|
.detail("Idx", lockNum)
|
|
|
|
.detail("Ver", versions.get().second);
|
2018-09-29 03:25:27 +08:00
|
|
|
maxRecoveryVersion = versions.get().second;
|
|
|
|
maxRecoveryIndex = lockNum;
|
|
|
|
}
|
|
|
|
lockNum++;
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(TagPartitionedLogSystem::getDurableVersionChanged(allLockResults[lockNum]));
|
2018-09-29 03:25:27 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (maxRecoveryIndex > 0) {
|
|
|
|
logServers = oldLogData[maxRecoveryIndex - 1].tLogs;
|
|
|
|
prevState.txsTags = oldLogData[maxRecoveryIndex - 1].txsTags;
|
2018-09-29 03:25:27 +08:00
|
|
|
lockResults[0] = allLockResults[maxRecoveryIndex];
|
|
|
|
lockResults[0].isCurrent = true;
|
|
|
|
|
|
|
|
std::vector<Reference<AsyncVar<bool>>> failed;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& log : logServers[0]->logServers) {
|
2020-11-07 13:13:10 +08:00
|
|
|
failed.push_back(makeReference<AsyncVar<bool>>());
|
2021-03-11 02:06:03 +08:00
|
|
|
failureTrackers.push_back(monitorLog(log, failed.back()));
|
2018-09-29 03:25:27 +08:00
|
|
|
}
|
|
|
|
ASSERT(logFailed.size() == 1);
|
|
|
|
logFailed[0] = failed;
|
|
|
|
oldLogData.erase(oldLogData.begin(), oldLogData.begin() + maxRecoveryIndex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
state Optional<Version> lastEnd;
|
2018-04-23 02:14:13 +08:00
|
|
|
state Version knownCommittedVersion = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
2018-05-01 09:32:04 +08:00
|
|
|
Version minEnd = std::numeric_limits<Version>::max();
|
|
|
|
Version maxEnd = 0;
|
|
|
|
std::vector<Future<Void>> changes;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int log = 0; log < logServers.size(); log++) {
|
|
|
|
if (!logServers[log]->isLocal) {
|
2017-07-10 05:46:16 +08:00
|
|
|
continue;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
auto versions =
|
|
|
|
TagPartitionedLogSystem::getDurableVersion(dbgid, lockResults[log], logFailed[log], lastEnd);
|
|
|
|
if (versions.present()) {
|
2018-05-01 09:32:04 +08:00
|
|
|
knownCommittedVersion = std::max(knownCommittedVersion, versions.get().first);
|
|
|
|
maxEnd = std::max(maxEnd, versions.get().second);
|
|
|
|
minEnd = std::min(minEnd, versions.get().second);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-05-01 09:32:04 +08:00
|
|
|
changes.push_back(TagPartitionedLogSystem::getDurableVersionChanged(lockResults[log], logFailed[log]));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (maxEnd > 0 && (!lastEnd.present() || maxEnd < lastEnd.get())) {
|
|
|
|
TEST(lastEnd.present()); // Restarting recovery at an earlier point
|
2017-07-10 05:46:16 +08:00
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
auto logSystem = makeReference<TagPartitionedLogSystem>(dbgid, locality, prevState.recoveryCount);
|
2017-07-10 05:46:16 +08:00
|
|
|
|
2018-05-01 09:32:04 +08:00
|
|
|
lastEnd = minEnd;
|
2017-07-10 05:46:16 +08:00
|
|
|
logSystem->tLogs = logServers;
|
2018-04-09 12:24:05 +08:00
|
|
|
logSystem->logRouterTags = prevState.logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
logSystem->txsTags = prevState.txsTags;
|
2017-07-10 05:46:16 +08:00
|
|
|
logSystem->oldLogData = oldLogData;
|
|
|
|
logSystem->logSystemType = prevState.logSystemType;
|
2018-03-30 06:12:38 +08:00
|
|
|
logSystem->rejoins = rejoins;
|
|
|
|
logSystem->lockResults = lockResults;
|
2019-06-26 07:17:45 +08:00
|
|
|
if (knownCommittedVersion > minEnd) {
|
2021-03-11 02:06:03 +08:00
|
|
|
knownCommittedVersion = minEnd;
|
2019-06-26 07:17:45 +08:00
|
|
|
}
|
2019-06-28 06:44:44 +08:00
|
|
|
logSystem->recoverAt = minEnd;
|
|
|
|
logSystem->knownCommittedVersion = knownCommittedVersion;
|
2019-07-13 07:26:28 +08:00
|
|
|
TraceEvent(SevDebug, "FinalRecoveryVersionInfo")
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("KCV", knownCommittedVersion)
|
|
|
|
.detail("MinEnd", minEnd);
|
2018-04-10 02:44:54 +08:00
|
|
|
logSystem->remoteLogsWrittenToCoreState = true;
|
2018-05-06 08:56:00 +08:00
|
|
|
logSystem->stopped = true;
|
2019-04-11 01:30:34 +08:00
|
|
|
logSystem->pseudoLocalities = prevState.pseudoLocalities;
|
2017-07-10 05:46:16 +08:00
|
|
|
|
|
|
|
outLogSystem->set(logSystem);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(waitForAny(changes));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> recruitOldLogRouters(TagPartitionedLogSystem* self,
|
|
|
|
vector<WorkerInterface> workers,
|
|
|
|
LogEpoch recoveryCount,
|
|
|
|
int8_t locality,
|
|
|
|
Version startVersion,
|
|
|
|
std::vector<LocalityData> tLogLocalities,
|
|
|
|
Reference<IReplicationPolicy> tLogPolicy,
|
|
|
|
bool forRemote) {
|
2018-04-09 12:24:05 +08:00
|
|
|
state vector<vector<Future<TLogInterface>>> logRouterInitializationReplies;
|
|
|
|
state vector<Future<TLogInterface>> allReplies;
|
|
|
|
int nextRouter = 0;
|
2019-02-18 10:46:59 +08:00
|
|
|
state Version lastStart = std::numeric_limits<Version>::max();
|
2018-04-13 06:20:54 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!forRemote) {
|
2019-04-20 00:41:09 +08:00
|
|
|
Version maxStart = getMaxLocalStartVersion(self->tLogs);
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2018-04-24 03:42:51 +08:00
|
|
|
lastStart = std::max(startVersion, maxStart);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->logRouterTags == 0) {
|
2018-04-30 09:54:47 +08:00
|
|
|
ASSERT_WE_THINK(false);
|
2018-04-23 02:54:39 +08:00
|
|
|
self->logSystemConfigChanged.trigger();
|
2018-04-13 06:20:54 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool found = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tLogs : self->tLogs) {
|
|
|
|
if (tLogs->locality == locality) {
|
2018-04-13 06:20:54 +08:00
|
|
|
found = true;
|
|
|
|
}
|
2018-04-13 09:14:23 +08:00
|
|
|
|
2018-04-13 06:20:54 +08:00
|
|
|
tLogs->logRouters.clear();
|
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!found) {
|
|
|
|
TraceEvent("RecruitingOldLogRoutersAddingLocality")
|
|
|
|
.detail("Locality", locality)
|
|
|
|
.detail("LastStart", lastStart);
|
2020-11-07 15:50:55 +08:00
|
|
|
auto newLogSet = makeReference<LogSet>();
|
2018-04-13 06:20:54 +08:00
|
|
|
newLogSet->locality = locality;
|
2018-04-15 10:06:24 +08:00
|
|
|
newLogSet->startVersion = lastStart;
|
2018-04-13 06:20:54 +08:00
|
|
|
newLogSet->isLocal = false;
|
|
|
|
self->tLogs.push_back(newLogSet);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tLogs : self->tLogs) {
|
|
|
|
// Recruit log routers for old generations of the primary locality
|
|
|
|
if (tLogs->locality == locality) {
|
2019-04-19 05:53:51 +08:00
|
|
|
logRouterInitializationReplies.emplace_back();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < self->logRouterTags; i++) {
|
2018-04-13 06:20:54 +08:00
|
|
|
InitializeLogRouterRequest req;
|
|
|
|
req.recoveryCount = recoveryCount;
|
|
|
|
req.routerTag = Tag(tagLocalityLogRouter, i);
|
2018-04-15 10:06:24 +08:00
|
|
|
req.startVersion = lastStart;
|
2018-04-29 09:04:57 +08:00
|
|
|
req.tLogLocalities = tLogLocalities;
|
|
|
|
req.tLogPolicy = tLogPolicy;
|
2018-04-30 04:47:32 +08:00
|
|
|
req.locality = locality;
|
2021-03-11 02:06:03 +08:00
|
|
|
auto reply = transformErrors(
|
|
|
|
throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
|
|
|
|
req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed());
|
|
|
|
logRouterInitializationReplies.back().push_back(reply);
|
|
|
|
allReplies.push_back(reply);
|
|
|
|
nextRouter = (nextRouter + 1) % workers.size();
|
2018-04-13 06:20:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& old : self->oldLogData) {
|
2019-04-20 00:41:09 +08:00
|
|
|
Version maxStart = getMaxLocalStartVersion(old.tLogs);
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (old.logRouterTags == 0 || maxStart >= lastStart) {
|
2018-04-09 12:24:05 +08:00
|
|
|
break;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-24 03:42:51 +08:00
|
|
|
lastStart = std::max(startVersion, maxStart);
|
2018-04-09 12:24:05 +08:00
|
|
|
bool found = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tLogs : old.tLogs) {
|
|
|
|
if (tLogs->locality == locality) {
|
2018-03-30 06:12:38 +08:00
|
|
|
found = true;
|
2018-04-09 12:24:05 +08:00
|
|
|
}
|
2018-04-13 06:20:54 +08:00
|
|
|
tLogs->logRouters.clear();
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!found) {
|
|
|
|
TraceEvent("RecruitingOldLogRoutersAddingLocality")
|
|
|
|
.detail("Locality", locality)
|
|
|
|
.detail("LastStart", lastStart);
|
2020-11-07 15:50:55 +08:00
|
|
|
auto newLogSet = makeReference<LogSet>();
|
2018-04-09 12:24:05 +08:00
|
|
|
newLogSet->locality = locality;
|
2018-04-15 10:06:24 +08:00
|
|
|
newLogSet->startVersion = lastStart;
|
2018-04-09 12:24:05 +08:00
|
|
|
old.tLogs.push_back(newLogSet);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tLogs : old.tLogs) {
|
|
|
|
// Recruit log routers for old generations of the primary locality
|
|
|
|
if (tLogs->locality == locality) {
|
2019-04-19 05:53:51 +08:00
|
|
|
logRouterInitializationReplies.emplace_back();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < old.logRouterTags; i++) {
|
2018-03-30 06:12:38 +08:00
|
|
|
InitializeLogRouterRequest req;
|
|
|
|
req.recoveryCount = recoveryCount;
|
|
|
|
req.routerTag = Tag(tagLocalityLogRouter, i);
|
2018-04-15 10:06:24 +08:00
|
|
|
req.startVersion = lastStart;
|
2018-04-29 09:04:57 +08:00
|
|
|
req.tLogLocalities = tLogLocalities;
|
|
|
|
req.tLogPolicy = tLogPolicy;
|
2018-04-30 04:47:32 +08:00
|
|
|
req.locality = locality;
|
2021-03-11 02:06:03 +08:00
|
|
|
auto reply = transformErrors(
|
|
|
|
throwErrorOr(workers[nextRouter].logRouter.getReplyUnlessFailedFor(
|
|
|
|
req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed());
|
|
|
|
logRouterInitializationReplies.back().push_back(reply);
|
|
|
|
allReplies.push_back(reply);
|
|
|
|
nextRouter = (nextRouter + 1) % workers.size();
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(waitForAll(allReplies));
|
2018-03-30 06:12:38 +08:00
|
|
|
|
|
|
|
int nextReplies = 0;
|
2019-02-18 10:46:59 +08:00
|
|
|
lastStart = std::numeric_limits<Version>::max();
|
2018-04-27 01:59:21 +08:00
|
|
|
vector<Future<Void>> failed;
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!forRemote) {
|
2019-04-20 00:41:09 +08:00
|
|
|
Version maxStart = getMaxLocalStartVersion(self->tLogs);
|
2018-04-24 03:42:51 +08:00
|
|
|
|
|
|
|
lastStart = std::max(startVersion, maxStart);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tLogs : self->tLogs) {
|
|
|
|
if (tLogs->locality == locality) {
|
|
|
|
for (int i = 0; i < logRouterInitializationReplies[nextReplies].size(); i++) {
|
2020-11-07 13:13:10 +08:00
|
|
|
tLogs->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
|
|
|
|
OptionalInterface<TLogInterface>(logRouterInitializationReplies[nextReplies][i].get())));
|
2020-07-11 06:53:21 +08:00
|
|
|
failed.push_back(waitFailureClient(
|
|
|
|
logRouterInitializationReplies[nextReplies][i].get().waitFailure,
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
|
|
|
-SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2018-04-13 06:20:54 +08:00
|
|
|
}
|
|
|
|
nextReplies++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& old : self->oldLogData) {
|
2019-04-20 00:41:09 +08:00
|
|
|
Version maxStart = getMaxLocalStartVersion(old.tLogs);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (old.logRouterTags == 0 || maxStart >= lastStart) {
|
2018-04-09 12:24:05 +08:00
|
|
|
break;
|
|
|
|
}
|
2018-04-24 03:42:51 +08:00
|
|
|
lastStart = std::max(startVersion, maxStart);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tLogs : old.tLogs) {
|
|
|
|
if (tLogs->locality == locality) {
|
|
|
|
for (int i = 0; i < logRouterInitializationReplies[nextReplies].size(); i++) {
|
2020-11-07 13:13:10 +08:00
|
|
|
tLogs->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
|
|
|
|
OptionalInterface<TLogInterface>(logRouterInitializationReplies[nextReplies][i].get())));
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!forRemote) {
|
2020-07-11 06:53:21 +08:00
|
|
|
failed.push_back(waitFailureClient(
|
|
|
|
logRouterInitializationReplies[nextReplies][i].get().waitFailure,
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
|
|
|
-SERVER_KNOBS->TLOG_TIMEOUT / SERVER_KNOBS->SECONDS_BEFORE_NO_FAILURE_DELAY,
|
|
|
|
/*trace=*/true));
|
2018-04-27 03:55:28 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
nextReplies++;
|
|
|
|
}
|
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
}
|
2018-03-30 06:12:38 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!forRemote) {
|
2018-04-27 03:55:28 +08:00
|
|
|
self->logSystemConfigChanged.trigger();
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(failed.size() ? tagError<Void>(quorum(failed, 1), master_tlog_failed()) : Future<Void>(Never()));
|
2018-04-27 03:55:28 +08:00
|
|
|
throw internal_error();
|
|
|
|
}
|
|
|
|
return Void();
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2019-05-24 07:06:23 +08:00
|
|
|
static Version getMaxLocalStartVersion(const std::vector<Reference<LogSet>>& tLogs) {
|
2019-04-20 00:41:09 +08:00
|
|
|
Version maxStart = 0;
|
|
|
|
for (const auto& logSet : tLogs) {
|
2019-05-24 07:06:23 +08:00
|
|
|
if (logSet->isLocal) {
|
2019-04-20 00:41:09 +08:00
|
|
|
maxStart = std::max(maxStart, logSet->startVersion);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return maxStart;
|
|
|
|
}
|
|
|
|
|
2019-04-19 01:18:11 +08:00
|
|
|
static std::vector<Tag> getLocalTags(int8_t locality, const std::vector<Tag>& allTags) {
|
|
|
|
std::vector<Tag> localTags;
|
|
|
|
for (const auto& tag : allTags) {
|
|
|
|
if (locality == tagLocalitySpecial || locality == tag.locality || tag.locality < 0) {
|
|
|
|
localTags.push_back(tag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return localTags;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> newRemoteEpoch(TagPartitionedLogSystem* self,
|
|
|
|
Reference<TagPartitionedLogSystem> oldLogSystem,
|
|
|
|
Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
|
|
|
|
DatabaseConfiguration configuration,
|
|
|
|
LogEpoch recoveryCount,
|
|
|
|
int8_t remoteLocality,
|
|
|
|
std::vector<Tag> allTags) {
|
2018-01-06 06:15:25 +08:00
|
|
|
TraceEvent("RemoteLogRecruitment_WaitingForWorkers");
|
2021-03-11 02:06:03 +08:00
|
|
|
state RecruitRemoteFromConfigurationReply remoteWorkers = wait(fRemoteWorkers);
|
2018-03-07 08:31:21 +08:00
|
|
|
|
2019-04-20 00:41:09 +08:00
|
|
|
state Reference<LogSet> logSet(new LogSet());
|
2018-06-18 10:31:15 +08:00
|
|
|
logSet->tLogReplicationFactor = configuration.getRemoteTLogReplicationFactor();
|
2019-02-27 08:47:04 +08:00
|
|
|
logSet->tLogVersion = configuration.tLogVersion;
|
2018-06-18 10:31:15 +08:00
|
|
|
logSet->tLogPolicy = configuration.getRemoteTLogPolicy();
|
2017-09-08 06:32:08 +08:00
|
|
|
logSet->isLocal = false;
|
|
|
|
logSet->locality = remoteLocality;
|
2018-03-30 06:12:38 +08:00
|
|
|
|
2018-04-10 12:58:14 +08:00
|
|
|
logSet->startVersion = oldLogSystem->knownCommittedVersion + 1;
|
2018-04-09 12:24:05 +08:00
|
|
|
state int lockNum = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (lockNum < oldLogSystem->lockResults.size()) {
|
|
|
|
if (oldLogSystem->lockResults[lockNum].logSet->locality == remoteLocality) {
|
2018-05-01 09:32:04 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
auto versions =
|
|
|
|
TagPartitionedLogSystem::getDurableVersion(self->dbgid, oldLogSystem->lockResults[lockNum]);
|
|
|
|
if (versions.present()) {
|
|
|
|
logSet->startVersion =
|
|
|
|
std::min(std::min(versions.get().first + 1, oldLogSystem->lockResults[lockNum].epochEnd),
|
|
|
|
logSet->startVersion);
|
2018-05-01 09:32:04 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(TagPartitionedLogSystem::getDurableVersionChanged(oldLogSystem->lockResults[lockNum]));
|
2018-05-01 09:32:04 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
break;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
lockNum++;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2018-04-29 09:04:57 +08:00
|
|
|
vector<LocalityData> localities;
|
|
|
|
localities.resize(remoteWorkers.remoteTLogs.size());
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) {
|
2018-04-29 09:04:57 +08:00
|
|
|
localities[i] = remoteWorkers.remoteTLogs[i].locality;
|
|
|
|
}
|
|
|
|
|
2018-04-27 03:55:28 +08:00
|
|
|
state Future<Void> oldRouterRecruitment = Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (logSet->startVersion < oldLogSystem->knownCommittedVersion + 1) {
|
2018-11-03 05:11:39 +08:00
|
|
|
ASSERT(oldLogSystem->logRouterTags > 0);
|
2021-03-11 02:06:03 +08:00
|
|
|
oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(self,
|
|
|
|
remoteWorkers.logRouters,
|
|
|
|
recoveryCount,
|
|
|
|
remoteLocality,
|
|
|
|
logSet->startVersion,
|
|
|
|
localities,
|
|
|
|
logSet->tLogPolicy,
|
|
|
|
true);
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2017-07-10 05:46:16 +08:00
|
|
|
state vector<Future<TLogInterface>> logRouterInitializationReplies;
|
2019-04-30 01:48:09 +08:00
|
|
|
const Version startVersion = oldLogSystem->logRouterTags == 0
|
2021-03-11 02:06:03 +08:00
|
|
|
? oldLogSystem->recoverAt.get() + 1
|
|
|
|
: std::max(self->tLogs[0]->startVersion, logSet->startVersion);
|
2019-04-30 01:48:09 +08:00
|
|
|
for (int i = 0; i < self->logRouterTags; i++) {
|
2017-07-10 05:46:16 +08:00
|
|
|
InitializeLogRouterRequest req;
|
|
|
|
req.recoveryCount = recoveryCount;
|
2017-08-04 07:16:36 +08:00
|
|
|
req.routerTag = Tag(tagLocalityLogRouter, i);
|
2019-04-30 01:48:09 +08:00
|
|
|
req.startVersion = startVersion;
|
2018-04-29 09:04:57 +08:00
|
|
|
req.tLogLocalities = localities;
|
|
|
|
req.tLogPolicy = logSet->tLogPolicy;
|
2018-04-30 04:47:32 +08:00
|
|
|
req.locality = remoteLocality;
|
2021-03-11 02:06:03 +08:00
|
|
|
logRouterInitializationReplies.push_back(transformErrors(
|
|
|
|
throwErrorOr(
|
|
|
|
remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].logRouter.getReplyUnlessFailedFor(
|
|
|
|
req, SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
|
2019-04-19 01:18:11 +08:00
|
|
|
std::vector<Tag> localTags = getLocalTags(remoteLocality, allTags);
|
|
|
|
LogSystemConfig oldLogSystemConfig = oldLogSystem->getLogSystemConfig();
|
2018-06-15 03:55:33 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
logSet->tLogLocalities.resize(remoteWorkers.remoteTLogs.size());
|
|
|
|
logSet->logServers.resize(
|
|
|
|
remoteWorkers.remoteTLogs
|
|
|
|
.size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
|
2019-06-28 06:15:05 +08:00
|
|
|
logSet->updateLocalitySet(localities);
|
|
|
|
|
2017-07-10 05:46:16 +08:00
|
|
|
state vector<Future<TLogInterface>> remoteTLogInitializationReplies;
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<InitializeTLogRequest> remoteTLogReqs(remoteWorkers.remoteTLogs.size());
|
2019-06-28 06:15:05 +08:00
|
|
|
|
2019-07-17 16:25:09 +08:00
|
|
|
bool nonShardedTxs = self->getTLogVersion() < TLogVersion::V4;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogSystem->logRouterTags == 0) {
|
2019-06-28 06:15:05 +08:00
|
|
|
std::vector<int> locations;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (Tag tag : localTags) {
|
2019-06-28 06:15:05 +08:00
|
|
|
locations.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
logSet->getPushLocations(VectorRef<Tag>(&tag, 1), locations, 0);
|
|
|
|
for (int loc : locations)
|
|
|
|
remoteTLogReqs[loc].recoverTags.push_back(tag);
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogSystem->tLogs.size()) {
|
2019-06-29 03:33:24 +08:00
|
|
|
int maxTxsTags = oldLogSystem->txsTags;
|
2019-07-17 16:25:09 +08:00
|
|
|
bool needsOldTxs = oldLogSystem->tLogs[0]->tLogVersion < TLogVersion::V4;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : oldLogSystem->oldLogData) {
|
2019-06-29 03:33:24 +08:00
|
|
|
maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
|
2019-07-17 16:25:09 +08:00
|
|
|
needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
|
2019-06-28 14:39:19 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
|
|
|
|
Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
|
|
|
|
Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % self->txsTags);
|
2019-06-28 06:15:05 +08:00
|
|
|
locations.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
logSet->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
|
|
|
|
for (int loc : locations)
|
|
|
|
remoteTLogReqs[loc].recoverTags.push_back(tag);
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
2019-08-01 07:04:35 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogSystem->tLogs.size()) {
|
|
|
|
if (nonShardedTxs) {
|
2019-08-01 07:04:35 +08:00
|
|
|
localTags.push_back(txsTag);
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < self->txsTags; i++) {
|
2019-08-01 07:04:35 +08:00
|
|
|
localTags.push_back(Tag(tagLocalityTxs, i));
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) {
|
|
|
|
InitializeTLogRequest& req = remoteTLogReqs[i];
|
2018-04-21 04:25:22 +08:00
|
|
|
req.recruitmentID = self->recruitmentID;
|
2019-02-23 04:15:23 +08:00
|
|
|
req.logVersion = configuration.tLogVersion;
|
2017-07-10 05:46:16 +08:00
|
|
|
req.storeType = configuration.tLogDataStoreType;
|
2019-02-08 09:02:47 +08:00
|
|
|
req.spillType = configuration.tLogSpillType;
|
2019-04-19 01:18:11 +08:00
|
|
|
req.recoverFrom = oldLogSystemConfig;
|
2018-06-27 09:20:28 +08:00
|
|
|
req.recoverAt = oldLogSystem->recoverAt.get();
|
2017-07-16 06:15:03 +08:00
|
|
|
req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
|
2017-07-10 05:46:16 +08:00
|
|
|
req.epoch = recoveryCount;
|
2017-08-04 07:16:36 +08:00
|
|
|
req.remoteTag = Tag(tagLocalityRemoteLog, i);
|
2018-03-30 06:12:38 +08:00
|
|
|
req.locality = remoteLocality;
|
|
|
|
req.isPrimary = false;
|
2018-06-15 03:55:33 +08:00
|
|
|
req.allTags = localTags;
|
2018-04-09 12:24:05 +08:00
|
|
|
req.startVersion = logSet->startVersion;
|
|
|
|
req.logRouterTags = 0;
|
2019-06-29 03:33:24 +08:00
|
|
|
req.txsTags = self->txsTags;
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
|
|
|
|
2021-03-04 11:36:21 +08:00
|
|
|
remoteTLogInitializationReplies.reserve(remoteWorkers.remoteTLogs.size());
|
|
|
|
for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++)
|
2021-03-11 02:06:03 +08:00
|
|
|
remoteTLogInitializationReplies.push_back(transformErrors(
|
|
|
|
throwErrorOr(remoteWorkers.remoteTLogs[i].tLog.getReplyUnlessFailedFor(
|
|
|
|
remoteTLogReqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
|
|
|
|
|
|
|
TraceEvent("RemoteLogRecruitment_InitializingRemoteLogs")
|
|
|
|
.detail("StartVersion", logSet->startVersion)
|
|
|
|
.detail("LocalStart", self->tLogs[0]->startVersion)
|
|
|
|
.detail("LogRouterTags", self->logRouterTags);
|
|
|
|
wait(waitForAll(remoteTLogInitializationReplies) && waitForAll(logRouterInitializationReplies) &&
|
|
|
|
oldRouterRecruitment);
|
|
|
|
|
|
|
|
for (int i = 0; i < logRouterInitializationReplies.size(); i++) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logSet->logRouters.push_back(makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
|
|
|
|
OptionalInterface<TLogInterface>(logRouterInitializationReplies[i].get())));
|
2018-04-27 01:59:21 +08:00
|
|
|
}
|
2017-07-10 05:46:16 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < remoteTLogInitializationReplies.size(); i++) {
|
2020-11-07 15:50:55 +08:00
|
|
|
logSet->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
|
|
|
|
OptionalInterface<TLogInterface>(remoteTLogInitializationReplies[i].get()));
|
2017-09-08 06:32:08 +08:00
|
|
|
logSet->tLogLocalities[i] = remoteWorkers.remoteTLogs[i].locality;
|
2017-07-10 05:46:16 +08:00
|
|
|
}
|
2018-04-25 09:14:34 +08:00
|
|
|
filterLocalityDataForPolicy(logSet->tLogPolicy, &logSet->tLogLocalities);
|
2017-07-10 05:46:16 +08:00
|
|
|
|
|
|
|
std::vector<Future<Void>> recoveryComplete;
|
2021-03-04 11:36:21 +08:00
|
|
|
recoveryComplete.reserve(logSet->logServers.size());
|
|
|
|
for (int i = 0; i < logSet->logServers.size(); i++)
|
2021-03-11 02:06:03 +08:00
|
|
|
recoveryComplete.push_back(transformErrors(
|
|
|
|
throwErrorOr(logSet->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
|
|
|
|
TLogRecoveryFinishedRequest(),
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
|
|
|
SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2017-07-14 03:29:21 +08:00
|
|
|
self->remoteRecoveryComplete = waitForAll(recoveryComplete);
|
2021-03-11 02:06:03 +08:00
|
|
|
self->tLogs.push_back(logSet);
|
2018-01-06 06:15:25 +08:00
|
|
|
TraceEvent("RemoteLogRecruitment_CompletingRecovery");
|
2017-07-10 05:46:16 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Reference<ILogSystem>> newEpoch(Reference<TagPartitionedLogSystem> oldLogSystem,
|
|
|
|
RecruitFromConfigurationReply recr,
|
|
|
|
Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
|
|
|
|
DatabaseConfiguration configuration,
|
|
|
|
LogEpoch recoveryCount,
|
|
|
|
int8_t primaryLocality,
|
|
|
|
int8_t remoteLocality,
|
|
|
|
std::vector<Tag> allTags,
|
|
|
|
Reference<AsyncVar<bool>> recruitmentStalled) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state double startTime = now();
|
2019-06-01 07:14:58 +08:00
|
|
|
state Reference<TagPartitionedLogSystem> logSystem(
|
|
|
|
new TagPartitionedLogSystem(oldLogSystem->getDebugID(), oldLogSystem->locality, recoveryCount));
|
2019-04-09 05:45:16 +08:00
|
|
|
logSystem->logSystemType = LogSystemType::tagPartitioned;
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystem->expectedLogSets = 1;
|
2018-06-27 09:20:28 +08:00
|
|
|
logSystem->recoveredAt = oldLogSystem->recoverAt;
|
2018-07-04 13:59:04 +08:00
|
|
|
logSystem->repopulateRegionAntiQuorum = configuration.repopulateRegionAntiQuorum;
|
2019-05-11 05:01:52 +08:00
|
|
|
logSystem->recruitmentID = deterministicRandom()->randomUniqueID();
|
2019-07-17 16:25:09 +08:00
|
|
|
logSystem->txsTags = configuration.tLogVersion >= TLogVersion::V4 ? recr.tLogs.size() : 0;
|
2018-04-21 04:25:22 +08:00
|
|
|
oldLogSystem->recruitmentID = logSystem->recruitmentID;
|
2018-06-17 08:39:02 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (configuration.usableRegions > 1) {
|
|
|
|
logSystem->logRouterTags =
|
|
|
|
recr.tLogs.size() *
|
|
|
|
std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size()));
|
2018-06-17 08:39:02 +08:00
|
|
|
logSystem->expectedLogSets++;
|
2019-04-24 06:39:26 +08:00
|
|
|
logSystem->addPseudoLocality(tagLocalityLogRouterMapped);
|
2020-01-28 05:14:52 +08:00
|
|
|
TraceEvent e("AddPseudoLocality", logSystem->getDebugID());
|
|
|
|
e.detail("Locality1", "LogRouterMapped");
|
2020-02-05 02:09:16 +08:00
|
|
|
if (configuration.backupWorkerEnabled) {
|
2020-01-28 05:14:52 +08:00
|
|
|
logSystem->addPseudoLocality(tagLocalityBackup);
|
|
|
|
e.detail("Locality2", "Backup");
|
|
|
|
}
|
2020-02-05 02:09:16 +08:00
|
|
|
} else if (configuration.backupWorkerEnabled) {
|
2019-12-11 05:28:49 +08:00
|
|
|
// Single region uses log router tag for backup workers.
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->logRouterTags =
|
|
|
|
recr.tLogs.size() *
|
|
|
|
std::max<int>(1, configuration.desiredLogRouterCount / std::max<int>(1, recr.tLogs.size()));
|
2019-12-11 05:28:49 +08:00
|
|
|
logSystem->addPseudoLocality(tagLocalityBackup);
|
|
|
|
TraceEvent("AddPseudoLocality", logSystem->getDebugID()).detail("Locality", "Backup");
|
2018-06-17 08:39:02 +08:00
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2020-11-07 13:13:10 +08:00
|
|
|
logSystem->tLogs.push_back(makeReference<LogSet>());
|
2019-02-27 08:47:04 +08:00
|
|
|
logSystem->tLogs[0]->tLogVersion = configuration.tLogVersion;
|
2017-07-12 06:48:10 +08:00
|
|
|
logSystem->tLogs[0]->tLogWriteAntiQuorum = configuration.tLogWriteAntiQuorum;
|
|
|
|
logSystem->tLogs[0]->tLogReplicationFactor = configuration.tLogReplicationFactor;
|
|
|
|
logSystem->tLogs[0]->tLogPolicy = configuration.tLogPolicy;
|
|
|
|
logSystem->tLogs[0]->isLocal = true;
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystem->tLogs[0]->locality = primaryLocality;
|
|
|
|
|
2018-03-30 06:12:38 +08:00
|
|
|
state RegionInfo region = configuration.getRegion(recr.dcId);
|
2018-03-06 11:27:46 +08:00
|
|
|
|
2019-06-29 03:33:24 +08:00
|
|
|
state int maxTxsTags = oldLogSystem->txsTags;
|
2019-07-17 16:25:09 +08:00
|
|
|
state bool needsOldTxs = oldLogSystem->tLogs.size() && oldLogSystem->getTLogVersion() < TLogVersion::V4;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& it : oldLogSystem->oldLogData) {
|
2019-06-29 03:33:24 +08:00
|
|
|
maxTxsTags = std::max<int>(maxTxsTags, it.txsTags);
|
2019-07-17 16:25:09 +08:00
|
|
|
needsOldTxs = needsOldTxs || it.tLogs[0]->tLogVersion < TLogVersion::V4;
|
2019-06-28 14:39:19 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
|
2020-11-07 13:13:10 +08:00
|
|
|
logSystem->tLogs.push_back(makeReference<LogSet>());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (recr.satelliteFallback) {
|
2018-06-29 14:15:32 +08:00
|
|
|
logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorumFallback;
|
|
|
|
logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactorFallback;
|
|
|
|
logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicyFallback;
|
|
|
|
} else {
|
|
|
|
logSystem->tLogs[1]->tLogWriteAntiQuorum = region.satelliteTLogWriteAntiQuorum;
|
|
|
|
logSystem->tLogs[1]->tLogReplicationFactor = region.satelliteTLogReplicationFactor;
|
|
|
|
logSystem->tLogs[1]->tLogPolicy = region.satelliteTLogPolicy;
|
|
|
|
}
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystem->tLogs[1]->isLocal = true;
|
2018-06-16 02:06:38 +08:00
|
|
|
logSystem->tLogs[1]->locality = tagLocalitySatellite;
|
2019-02-27 08:47:04 +08:00
|
|
|
logSystem->tLogs[1]->tLogVersion = configuration.tLogVersion;
|
2018-04-10 12:58:14 +08:00
|
|
|
logSystem->tLogs[1]->startVersion = oldLogSystem->knownCommittedVersion + 1;
|
2017-07-10 05:46:16 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[1]->tLogLocalities.resize(recr.satelliteTLogs.size());
|
|
|
|
for (int i = 0; i < recr.satelliteTLogs.size(); i++) {
|
2018-06-17 08:39:02 +08:00
|
|
|
logSystem->tLogs[1]->tLogLocalities[i] = recr.satelliteTLogs[i].locality;
|
|
|
|
}
|
|
|
|
filterLocalityDataForPolicy(logSystem->tLogs[1]->tLogPolicy, &logSystem->tLogs[1]->tLogLocalities);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[1]->logServers.resize(
|
|
|
|
recr.satelliteTLogs
|
|
|
|
.size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
|
2018-06-17 08:39:02 +08:00
|
|
|
logSystem->tLogs[1]->updateLocalitySet(logSystem->tLogs[1]->tLogLocalities);
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[1]->populateSatelliteTagLocations(
|
|
|
|
logSystem->logRouterTags, oldLogSystem->logRouterTags, logSystem->txsTags, maxTxsTags);
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystem->expectedLogSets++;
|
|
|
|
}
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogSystem->tLogs.size()) {
|
2019-04-19 05:53:51 +08:00
|
|
|
logSystem->oldLogData.emplace_back();
|
2017-07-10 05:46:16 +08:00
|
|
|
logSystem->oldLogData[0].tLogs = oldLogSystem->tLogs;
|
2019-10-02 04:38:52 +08:00
|
|
|
logSystem->oldLogData[0].epochBegin = oldLogSystem->tLogs[0]->startVersion;
|
2017-05-26 04:48:44 +08:00
|
|
|
logSystem->oldLogData[0].epochEnd = oldLogSystem->knownCommittedVersion + 1;
|
2018-04-13 06:20:54 +08:00
|
|
|
logSystem->oldLogData[0].logRouterTags = oldLogSystem->logRouterTags;
|
2019-06-29 08:51:16 +08:00
|
|
|
logSystem->oldLogData[0].txsTags = oldLogSystem->txsTags;
|
2019-04-11 01:30:34 +08:00
|
|
|
logSystem->oldLogData[0].pseudoLocalities = oldLogSystem->pseudoLocalities;
|
2019-08-10 01:02:10 +08:00
|
|
|
logSystem->oldLogData[0].epoch = oldLogSystem->epoch;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->oldLogData.insert(
|
|
|
|
logSystem->oldLogData.end(), oldLogSystem->oldLogData.begin(), oldLogSystem->oldLogData.end());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-04-10 12:58:14 +08:00
|
|
|
logSystem->tLogs[0]->startVersion = oldLogSystem->knownCommittedVersion + 1;
|
2020-03-09 11:50:32 +08:00
|
|
|
logSystem->backupStartVersion = oldLogSystem->knownCommittedVersion + 1;
|
2018-04-09 12:24:05 +08:00
|
|
|
state int lockNum = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (lockNum < oldLogSystem->lockResults.size()) {
|
|
|
|
if (oldLogSystem->lockResults[lockNum].logSet->locality == primaryLocality) {
|
|
|
|
if (oldLogSystem->lockResults[lockNum].isCurrent &&
|
|
|
|
oldLogSystem->lockResults[lockNum].logSet->isLocal) {
|
2018-04-23 02:14:13 +08:00
|
|
|
break;
|
|
|
|
}
|
2018-06-14 09:14:14 +08:00
|
|
|
state Future<Void> stalledAfter = setAfter(recruitmentStalled, SERVER_KNOBS->MAX_RECOVERY_TIME, true);
|
2018-05-01 09:32:04 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
auto versions = TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid,
|
|
|
|
oldLogSystem->lockResults[lockNum]);
|
|
|
|
if (versions.present()) {
|
|
|
|
logSystem->tLogs[0]->startVersion =
|
|
|
|
std::min(std::min(versions.get().first + 1, oldLogSystem->lockResults[lockNum].epochEnd),
|
|
|
|
logSystem->tLogs[0]->startVersion);
|
2018-05-01 09:32:04 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(TagPartitionedLogSystem::getDurableVersionChanged(oldLogSystem->lockResults[lockNum]));
|
2018-05-01 09:32:04 +08:00
|
|
|
}
|
2018-06-14 09:14:14 +08:00
|
|
|
stalledAfter.cancel();
|
2018-04-09 12:24:05 +08:00
|
|
|
break;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
lockNum++;
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
2018-04-09 12:24:05 +08:00
|
|
|
|
2018-04-29 09:04:57 +08:00
|
|
|
vector<LocalityData> localities;
|
|
|
|
localities.resize(recr.tLogs.size());
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < recr.tLogs.size(); i++) {
|
2018-04-29 09:04:57 +08:00
|
|
|
localities[i] = recr.tLogs[i].locality;
|
|
|
|
}
|
|
|
|
|
2018-04-27 01:59:21 +08:00
|
|
|
state Future<Void> oldRouterRecruitment = Never();
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("NewEpochStartVersion", oldLogSystem->getDebugID())
|
|
|
|
.detail("StartVersion", logSystem->tLogs[0]->startVersion)
|
|
|
|
.detail("EpochEnd", oldLogSystem->knownCommittedVersion + 1)
|
|
|
|
.detail("Locality", primaryLocality)
|
|
|
|
.detail("OldLogRouterTags", oldLogSystem->logRouterTags);
|
|
|
|
if (oldLogSystem->logRouterTags > 0 ||
|
|
|
|
logSystem->tLogs[0]->startVersion < oldLogSystem->knownCommittedVersion + 1) {
|
|
|
|
oldRouterRecruitment = TagPartitionedLogSystem::recruitOldLogRouters(oldLogSystem.getPtr(),
|
|
|
|
recr.oldLogRouters,
|
|
|
|
recoveryCount,
|
|
|
|
primaryLocality,
|
|
|
|
logSystem->tLogs[0]->startVersion,
|
|
|
|
localities,
|
|
|
|
logSystem->tLogs[0]->tLogPolicy,
|
|
|
|
false);
|
|
|
|
if (oldLogSystem->knownCommittedVersion - logSystem->tLogs[0]->startVersion >
|
|
|
|
SERVER_KNOBS->MAX_RECOVERY_VERSIONS) {
|
|
|
|
// make sure we can recover in the other DC.
|
|
|
|
for (auto& lockResult : oldLogSystem->lockResults) {
|
|
|
|
if (lockResult.logSet->locality == remoteLocality) {
|
|
|
|
if (TagPartitionedLogSystem::getDurableVersion(logSystem->dbgid, lockResult).present()) {
|
2018-06-14 09:14:14 +08:00
|
|
|
recruitmentStalled->set(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-04-23 02:54:39 +08:00
|
|
|
} else {
|
|
|
|
oldLogSystem->logSystemConfigChanged.trigger();
|
2018-03-30 06:12:38 +08:00
|
|
|
}
|
|
|
|
|
2019-04-19 01:18:11 +08:00
|
|
|
std::vector<Tag> localTags = getLocalTags(primaryLocality, allTags);
|
|
|
|
state LogSystemConfig oldLogSystemConfig = oldLogSystem->getLogSystemConfig();
|
2018-06-15 03:55:33 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
state vector<Future<TLogInterface>> initializationReplies;
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<InitializeTLogRequest> reqs(recr.tLogs.size());
|
2019-06-28 06:15:05 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[0]->tLogLocalities.resize(recr.tLogs.size());
|
|
|
|
logSystem->tLogs[0]->logServers.resize(
|
|
|
|
recr.tLogs.size()); // Dummy interfaces, so that logSystem->getPushLocations() below uses the correct size
|
2019-06-28 06:15:05 +08:00
|
|
|
logSystem->tLogs[0]->updateLocalitySet(localities);
|
|
|
|
|
|
|
|
std::vector<int> locations;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (Tag tag : localTags) {
|
2019-06-28 06:15:05 +08:00
|
|
|
locations.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[0]->getPushLocations(VectorRef<Tag>(&tag, 1), locations, 0);
|
|
|
|
for (int loc : locations)
|
|
|
|
reqs[loc].recoverTags.push_back(tag);
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < oldLogSystem->logRouterTags; i++) {
|
2019-06-28 06:15:05 +08:00
|
|
|
Tag tag = Tag(tagLocalityLogRouter, i);
|
2021-03-11 02:06:03 +08:00
|
|
|
reqs[logSystem->tLogs[0]->bestLocationFor(tag)].recoverTags.push_back(tag);
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
2019-07-17 16:25:09 +08:00
|
|
|
bool nonShardedTxs = logSystem->getTLogVersion() < TLogVersion::V4;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogSystem->tLogs.size()) {
|
|
|
|
for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
|
|
|
|
Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
|
|
|
|
Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % logSystem->txsTags);
|
2019-06-28 06:15:05 +08:00
|
|
|
locations.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[0]->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
|
|
|
|
for (int loc : locations)
|
|
|
|
reqs[loc].recoverTags.push_back(tag);
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (nonShardedTxs) {
|
2019-06-29 03:33:24 +08:00
|
|
|
localTags.push_back(txsTag);
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < logSystem->txsTags; i++) {
|
2019-06-29 03:33:24 +08:00
|
|
|
localTags.push_back(Tag(tagLocalityTxs, i));
|
|
|
|
}
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < recr.tLogs.size(); i++) {
|
|
|
|
InitializeTLogRequest& req = reqs[i];
|
2018-04-21 04:25:22 +08:00
|
|
|
req.recruitmentID = logSystem->recruitmentID;
|
2019-02-23 04:15:23 +08:00
|
|
|
req.logVersion = configuration.tLogVersion;
|
2017-05-26 04:48:44 +08:00
|
|
|
req.storeType = configuration.tLogDataStoreType;
|
2019-02-08 09:02:46 +08:00
|
|
|
req.spillType = configuration.tLogSpillType;
|
2019-04-19 01:18:11 +08:00
|
|
|
req.recoverFrom = oldLogSystemConfig;
|
2018-06-27 09:20:28 +08:00
|
|
|
req.recoverAt = oldLogSystem->recoverAt.get();
|
2017-05-26 04:48:44 +08:00
|
|
|
req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
|
|
|
|
req.epoch = recoveryCount;
|
2018-03-30 06:12:38 +08:00
|
|
|
req.locality = primaryLocality;
|
|
|
|
req.remoteTag = Tag(tagLocalityRemoteLog, i);
|
|
|
|
req.isPrimary = true;
|
2018-06-15 03:55:33 +08:00
|
|
|
req.allTags = localTags;
|
2018-04-09 12:24:05 +08:00
|
|
|
req.startVersion = logSystem->tLogs[0]->startVersion;
|
|
|
|
req.logRouterTags = logSystem->logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
req.txsTags = logSystem->txsTags;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-04 11:36:21 +08:00
|
|
|
initializationReplies.reserve(recr.tLogs.size());
|
|
|
|
for (int i = 0; i < recr.tLogs.size(); i++)
|
2021-03-11 02:06:03 +08:00
|
|
|
initializationReplies.push_back(transformErrors(
|
|
|
|
throwErrorOr(recr.tLogs[i].tLog.getReplyUnlessFailedFor(
|
|
|
|
reqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
2017-09-08 06:32:08 +08:00
|
|
|
|
|
|
|
state std::vector<Future<Void>> recoveryComplete;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (region.satelliteTLogReplicationFactor > 0 && configuration.usableRegions > 1) {
|
2017-09-08 06:32:08 +08:00
|
|
|
state vector<Future<TLogInterface>> satelliteInitializationReplies;
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<InitializeTLogRequest> sreqs(recr.satelliteTLogs.size());
|
2019-06-28 06:15:05 +08:00
|
|
|
std::vector<Tag> satelliteTags;
|
2019-07-11 04:59:52 +08:00
|
|
|
|
|
|
|
if (logSystem->logRouterTags) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < oldLogSystem->logRouterTags; i++) {
|
2019-07-11 04:59:52 +08:00
|
|
|
Tag tag = Tag(tagLocalityLogRouter, i);
|
2019-07-11 09:41:12 +08:00
|
|
|
// Satellite logs will index a mutation with tagLocalityLogRouter with an id greater than
|
2019-07-11 04:59:52 +08:00
|
|
|
// the number of log routers as having an id mod the number of log routers. We thus need
|
|
|
|
// to make sure that if we're going from more log routers in the previous generation to
|
|
|
|
// less log routers in the newer one, that we map the log router tags onto satellites that
|
|
|
|
// are the preferred location for id%logRouterTags.
|
2021-03-11 02:06:03 +08:00
|
|
|
Tag pushLocation = Tag(tagLocalityLogRouter, i % logSystem->logRouterTags);
|
2019-07-11 04:59:52 +08:00
|
|
|
locations.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[1]->getPushLocations(VectorRef<Tag>(&pushLocation, 1), locations, 0);
|
|
|
|
for (int loc : locations)
|
|
|
|
sreqs[loc].recoverTags.push_back(tag);
|
2019-07-11 04:59:52 +08:00
|
|
|
}
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (oldLogSystem->tLogs.size()) {
|
|
|
|
for (int i = needsOldTxs ? -1 : 0; i < maxTxsTags; i++) {
|
|
|
|
Tag tag = i == -1 ? txsTag : Tag(tagLocalityTxs, i);
|
|
|
|
Tag pushTag = (i == -1 || nonShardedTxs) ? txsTag : Tag(tagLocalityTxs, i % logSystem->txsTags);
|
2019-06-28 06:15:05 +08:00
|
|
|
locations.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->tLogs[1]->getPushLocations(VectorRef<Tag>(&pushTag, 1), locations, 0);
|
|
|
|
for (int loc : locations)
|
|
|
|
sreqs[loc].recoverTags.push_back(tag);
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (nonShardedTxs) {
|
2019-06-29 03:33:24 +08:00
|
|
|
satelliteTags.push_back(txsTag);
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < logSystem->txsTags; i++) {
|
2019-06-29 03:33:24 +08:00
|
|
|
satelliteTags.push_back(Tag(tagLocalityTxs, i));
|
|
|
|
}
|
2019-06-28 06:15:05 +08:00
|
|
|
}
|
|
|
|
}
|
2020-03-21 09:39:51 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < recr.satelliteTLogs.size(); i++) {
|
|
|
|
InitializeTLogRequest& req = sreqs[i];
|
2018-04-21 04:25:22 +08:00
|
|
|
req.recruitmentID = logSystem->recruitmentID;
|
2019-02-23 04:15:23 +08:00
|
|
|
req.logVersion = configuration.tLogVersion;
|
2017-09-08 06:32:08 +08:00
|
|
|
req.storeType = configuration.tLogDataStoreType;
|
2019-02-08 09:02:46 +08:00
|
|
|
req.spillType = configuration.tLogSpillType;
|
2019-04-19 01:18:11 +08:00
|
|
|
req.recoverFrom = oldLogSystemConfig;
|
2018-06-27 09:20:28 +08:00
|
|
|
req.recoverAt = oldLogSystem->recoverAt.get();
|
2017-09-08 06:32:08 +08:00
|
|
|
req.knownCommittedVersion = oldLogSystem->knownCommittedVersion;
|
|
|
|
req.epoch = recoveryCount;
|
2018-06-16 02:06:38 +08:00
|
|
|
req.locality = tagLocalitySatellite;
|
2018-03-30 06:12:38 +08:00
|
|
|
req.remoteTag = Tag();
|
|
|
|
req.isPrimary = true;
|
2018-06-15 03:55:33 +08:00
|
|
|
req.allTags = satelliteTags;
|
2018-04-10 12:58:14 +08:00
|
|
|
req.startVersion = oldLogSystem->knownCommittedVersion + 1;
|
2018-04-09 12:24:05 +08:00
|
|
|
req.logRouterTags = logSystem->logRouterTags;
|
2019-06-29 03:33:24 +08:00
|
|
|
req.txsTags = logSystem->txsTags;
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
|
|
|
|
2021-03-04 11:36:21 +08:00
|
|
|
satelliteInitializationReplies.reserve(recr.satelliteTLogs.size());
|
|
|
|
for (int i = 0; i < recr.satelliteTLogs.size(); i++)
|
2021-03-11 02:06:03 +08:00
|
|
|
satelliteInitializationReplies.push_back(transformErrors(
|
|
|
|
throwErrorOr(recr.satelliteTLogs[i].tLog.getReplyUnlessFailedFor(
|
|
|
|
sreqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
2017-09-08 06:32:08 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(waitForAll(satelliteInitializationReplies) || oldRouterRecruitment);
|
2017-09-08 06:32:08 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < satelliteInitializationReplies.size(); i++) {
|
2020-11-07 15:50:55 +08:00
|
|
|
logSystem->tLogs[1]->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
|
|
|
|
OptionalInterface<TLogInterface>(satelliteInitializationReplies[i].get()));
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < logSystem->tLogs[1]->logServers.size(); i++)
|
|
|
|
recoveryComplete.push_back(transformErrors(
|
|
|
|
throwErrorOr(
|
|
|
|
logSystem->tLogs[1]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
|
|
|
|
TLogRecoveryFinishedRequest(),
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
|
|
|
SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(waitForAll(initializationReplies) || oldRouterRecruitment);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < initializationReplies.size(); i++) {
|
2020-11-07 15:50:55 +08:00
|
|
|
logSystem->tLogs[0]->logServers[i] = makeReference<AsyncVar<OptionalInterface<TLogInterface>>>(
|
|
|
|
OptionalInterface<TLogInterface>(initializationReplies[i].get()));
|
2017-09-08 06:32:08 +08:00
|
|
|
logSystem->tLogs[0]->tLogLocalities[i] = recr.tLogs[i].locality;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-01-06 03:33:42 +08:00
|
|
|
filterLocalityDataForPolicy(logSystem->tLogs[0]->tLogPolicy, &logSystem->tLogs[0]->tLogLocalities);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Don't force failure of recovery if it took us a long time to recover. This avoids multiple long running
|
|
|
|
// recoveries causing tests to timeout
|
|
|
|
if (BUGGIFY && now() - startTime < 300 && g_network->isSimulated() && g_simulator.speedUpSimulation)
|
|
|
|
throw master_recovery_failed();
|
|
|
|
|
|
|
|
for (int i = 0; i < logSystem->tLogs[0]->logServers.size(); i++)
|
|
|
|
recoveryComplete.push_back(transformErrors(
|
|
|
|
throwErrorOr(
|
|
|
|
logSystem->tLogs[0]->logServers[i]->get().interf().recoveryFinished.getReplyUnlessFailedFor(
|
|
|
|
TLogRecoveryFinishedRequest(),
|
|
|
|
SERVER_KNOBS->TLOG_TIMEOUT,
|
|
|
|
SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)),
|
|
|
|
master_recovery_failed()));
|
2017-05-26 04:48:44 +08:00
|
|
|
logSystem->recoveryComplete = waitForAll(recoveryComplete);
|
2018-06-20 13:16:45 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (configuration.usableRegions > 1) {
|
2018-02-03 03:46:04 +08:00
|
|
|
logSystem->hasRemoteServers = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
logSystem->remoteRecovery = TagPartitionedLogSystem::newRemoteEpoch(logSystem.getPtr(),
|
|
|
|
oldLogSystem,
|
|
|
|
fRemoteWorkers,
|
|
|
|
configuration,
|
|
|
|
recoveryCount,
|
|
|
|
remoteLocality,
|
|
|
|
allTags);
|
2020-01-04 02:10:00 +08:00
|
|
|
if (oldLogSystem->tLogs.size() > 0 && oldLogSystem->tLogs[0]->locality == tagLocalitySpecial) {
|
2021-03-11 02:06:03 +08:00
|
|
|
// The wait is required so that we know both primary logs and remote logs have copied the data between
|
|
|
|
// the known committed version and the recovery version.
|
|
|
|
// FIXME: we can remove this wait once we are able to have log routers which can ship data to the remote
|
|
|
|
// logs without using log router tags.
|
2018-11-11 05:04:24 +08:00
|
|
|
wait(logSystem->remoteRecovery);
|
2018-11-03 05:11:39 +08:00
|
|
|
}
|
2018-01-17 10:12:40 +08:00
|
|
|
} else {
|
2018-02-03 03:46:04 +08:00
|
|
|
logSystem->hasRemoteServers = false;
|
2018-01-18 09:03:17 +08:00
|
|
|
logSystem->remoteRecovery = logSystem->recoveryComplete;
|
|
|
|
logSystem->remoteRecoveryComplete = logSystem->recoveryComplete;
|
2017-09-08 06:32:08 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
return logSystem;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> trackRejoins(
|
|
|
|
UID dbgid,
|
|
|
|
std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
|
|
|
|
logServers,
|
|
|
|
FutureStream<struct TLogRejoinRequest> rejoinRequests) {
|
2019-08-29 05:40:50 +08:00
|
|
|
state std::map<UID, ReplyPromise<TLogRejoinReply>> lastReply;
|
2021-03-10 10:57:46 +08:00
|
|
|
state std::set<UID> logsWaiting;
|
|
|
|
state double startTime = now();
|
|
|
|
state Future<Void> warnTimeout = delay(SERVER_KNOBS->TLOG_SLOW_REJOIN_WARN_TIMEOUT_SECS);
|
|
|
|
|
|
|
|
for (const auto& log : logServers) {
|
|
|
|
logsWaiting.insert(log.first->get().id());
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
try {
|
2021-03-10 10:57:46 +08:00
|
|
|
loop choose {
|
|
|
|
when(TLogRejoinRequest req = waitNext(rejoinRequests)) {
|
|
|
|
int pos = -1;
|
|
|
|
for (int i = 0; i < logServers.size(); i++) {
|
|
|
|
if (logServers[i].first->get().id() == req.myInterface.id()) {
|
|
|
|
pos = i;
|
|
|
|
logsWaiting.erase(logServers[i].first->get().id());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (pos != -1) {
|
|
|
|
TraceEvent("TLogJoinedMe", dbgid)
|
|
|
|
.detail("TLog", req.myInterface.id())
|
|
|
|
.detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
|
|
|
|
if (!logServers[pos].first->get().present() ||
|
|
|
|
req.myInterface.commit.getEndpoint() !=
|
|
|
|
logServers[pos].first->get().interf().commit.getEndpoint()) {
|
|
|
|
TLogInterface interf = req.myInterface;
|
|
|
|
filterLocalityDataForPolicyDcAndProcess(logServers[pos].second, &interf.filteredLocality);
|
|
|
|
logServers[pos].first->setUnconditional(OptionalInterface<TLogInterface>(interf));
|
|
|
|
}
|
|
|
|
lastReply[req.myInterface.id()].send(TLogRejoinReply{ false });
|
|
|
|
lastReply[req.myInterface.id()] = req.reply;
|
|
|
|
} else {
|
|
|
|
TraceEvent("TLogJoinedMeUnknown", dbgid)
|
|
|
|
.detail("TLog", req.myInterface.id())
|
|
|
|
.detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
|
|
|
|
req.reply.send(true);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-10 10:57:46 +08:00
|
|
|
when(wait(warnTimeout)) {
|
|
|
|
for (const auto& logId : logsWaiting) {
|
|
|
|
TraceEvent(SevWarnAlways, "TLogRejoinSlow", dbgid)
|
|
|
|
.detail("Elapsed", startTime - now())
|
|
|
|
.detail("LogId", logId);
|
2020-04-18 08:37:14 +08:00
|
|
|
}
|
2021-03-10 10:57:46 +08:00
|
|
|
warnTimeout = Never();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (...) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto it = lastReply.begin(); it != lastReply.end(); ++it)
|
|
|
|
it->second.send(TLogRejoinReply{ true });
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<TLogLockResult> lockTLog(UID myID, Reference<AsyncVar<OptionalInterface<TLogInterface>>> tlog) {
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("TLogLockStarted", myID).detail("TLog", tlog->get().id());
|
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(TLogLockResult data =
|
|
|
|
wait(tlog->get().present()
|
|
|
|
? brokenPromiseToNever(tlog->get().interf().lock.getReply<TLogLockResult>())
|
|
|
|
: Never())) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent("TLogLocked", myID).detail("TLog", tlog->get().id()).detail("End", data.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
return data;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(tlog->onChange())) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: disabled during merge, update and use in epochEnd()
|
2017-10-06 08:09:44 +08:00
|
|
|
/*
|
2017-08-29 04:47:35 +08:00
|
|
|
static void lockMinimalTLogSet(const UID& dbgid, const DBCoreState& prevState,
|
|
|
|
const std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>>& logServers,
|
|
|
|
const std::vector<Reference<AsyncVar<bool>>>& logFailed,
|
|
|
|
vector<Future<TLogLockResult>>* tLogReply ) {
|
2021-03-11 02:06:03 +08:00
|
|
|
// Invariant: tLogReply[i] must correspond to the tlog stored as logServers[i].
|
|
|
|
ASSERT(tLogReply->size() == prevState.tLogLocalities.size());
|
|
|
|
ASSERT(logFailed.size() == tLogReply->size());
|
|
|
|
|
|
|
|
// For any given index, only one of the following will be true.
|
|
|
|
auto locking_completed = [&logFailed, tLogReply](int index) {
|
|
|
|
const auto& entry = tLogReply->at(index);
|
|
|
|
return !logFailed[index]->get() && entry.isValid() && entry.isReady() && !entry.isError();
|
|
|
|
};
|
|
|
|
auto locking_failed = [&logFailed, tLogReply](int index) {
|
|
|
|
const auto& entry = tLogReply->at(index);
|
|
|
|
return logFailed[index]->get() || (entry.isValid() && entry.isReady() && entry.isError());
|
|
|
|
};
|
|
|
|
auto locking_pending = [&logFailed, tLogReply](int index) {
|
|
|
|
const auto& entry = tLogReply->at(index);
|
|
|
|
return !logFailed[index]->get() && (entry.isValid() && !entry.isReady());
|
|
|
|
};
|
|
|
|
auto locking_skipped = [&logFailed, tLogReply](int index) {
|
|
|
|
const auto& entry = tLogReply->at(index);
|
|
|
|
return !logFailed[index]->get() && !entry.isValid();
|
|
|
|
};
|
|
|
|
|
|
|
|
auto can_obtain_quorum = [&prevState](std::function<bool(int)> filter) {
|
|
|
|
LocalityGroup filter_true;
|
|
|
|
std::vector<LocalityData> filter_false, unused;
|
|
|
|
for (int i = 0; i < prevState.tLogLocalities.size() ; i++) {
|
|
|
|
if (filter(i)) {
|
|
|
|
filter_true.add(prevState.tLogLocalities[i]);
|
|
|
|
} else {
|
|
|
|
filter_false.push_back(prevState.tLogLocalities[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bool valid = filter_true.validate(prevState.tLogPolicy);
|
|
|
|
if (!valid && prevState.tLogWriteAntiQuorum > 0 ) {
|
|
|
|
valid = !validateAllCombinations(unused, filter_true, prevState.tLogPolicy, filter_false,
|
|
|
|
prevState.tLogWriteAntiQuorum, false);
|
|
|
|
}
|
|
|
|
return valid;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Step 1: Verify that if all the failed TLogs come back, they can't form a quorum.
|
|
|
|
if (can_obtain_quorum(locking_failed)) {
|
|
|
|
TraceEvent(SevInfo, "MasterRecoveryTLogLockingImpossible", dbgid);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Step 2: It's possible for us to succeed, but we need to lock additional logs.
|
|
|
|
//
|
|
|
|
// First, we need an accurate picture of what TLogs we're capable of locking. We can't tell the
|
|
|
|
// difference between a temporarily failed TLog and a permanently failed TLog. Thus, we assume
|
|
|
|
// all failures are permanent, and manually re-issue lock requests if they rejoin.
|
|
|
|
for (int i = 0; i < logFailed.size(); i++) {
|
|
|
|
const auto& r = tLogReply->at(i);
|
|
|
|
TEST(locking_failed(i) && (r.isValid() && !r.isReady())); // A TLog failed with a pending request.
|
|
|
|
// The reboot_a_tlog BUGGIFY below should cause the above case to be hit.
|
|
|
|
if (locking_failed(i)) {
|
|
|
|
tLogReply->at(i) = Future<TLogLockResult>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We're trying to paritition the set of old tlogs into two sets, L and R, such that:
|
|
|
|
// (1). R does not validate the policy
|
|
|
|
// (2). |R| is as large as possible
|
|
|
|
// (3). L contains all the already-locked TLogs
|
|
|
|
// and then we only issue lock requests to TLogs in L. This is safe, as R does not have quorum,
|
|
|
|
// so no commits may occur. It does not matter if L forms a quorum or not.
|
|
|
|
//
|
|
|
|
// We form these sets by starting with L as all machines and R as the empty set, and moving a
|
|
|
|
// random machine from L to R until (1) or (2) no longer holds as true. Code-wise, L is
|
|
|
|
// [0..end-can_omit), and R is [end-can_omit..end), and we move a random machine via randomizing
|
|
|
|
// the order of the tlogs. Choosing a random machine was verified to generate a good-enough
|
|
|
|
// result to be interesting intests sufficiently frequently that we don't need to try to
|
|
|
|
// calculate the exact optimal solution.
|
|
|
|
std::vector<std::pair<LocalityData, int>> tlogs;
|
|
|
|
for (int i = 0; i < prevState.tLogLocalities.size(); i++) {
|
|
|
|
tlogs.emplace_back(prevState.tLogLocalities[i], i);
|
|
|
|
}
|
|
|
|
deterministicRandom()->randomShuffle(tlogs);
|
|
|
|
// Rearrange the array such that things that the left is logs closer to being locked, and
|
|
|
|
// the right is logs that can't be locked. This makes us prefer locking already-locked TLogs,
|
|
|
|
// which is how we respect the decisions made in the previous execution.
|
|
|
|
auto idx_to_order = [&locking_completed, &locking_failed, &locking_pending, &locking_skipped](int index) {
|
|
|
|
bool complete = locking_completed(index);
|
|
|
|
bool pending = locking_pending(index);
|
|
|
|
bool skipped = locking_skipped(index);
|
|
|
|
bool failed = locking_failed(index);
|
|
|
|
|
|
|
|
ASSERT( complete + pending + skipped + failed == 1 );
|
|
|
|
|
|
|
|
if (complete) return 0;
|
|
|
|
if (pending) return 1;
|
|
|
|
if (skipped) return 2;
|
|
|
|
if (failed) return 3;
|
|
|
|
|
|
|
|
ASSERT(false); // Programmer error.
|
|
|
|
return -1;
|
|
|
|
};
|
|
|
|
std::sort(tlogs.begin(), tlogs.end(),
|
|
|
|
// TODO: Change long type to `auto` once toolchain supports C++17.
|
|
|
|
[&idx_to_order](const std::pair<LocalityData, int>& lhs, const std::pair<LocalityData, int>& rhs) {
|
|
|
|
return idx_to_order(lhs.second) < idx_to_order(rhs.second);
|
|
|
|
});
|
|
|
|
|
|
|
|
// Indexes that aren't in the vector are the ones we're considering omitting. Remove indexes until
|
|
|
|
// the removed set forms a quorum.
|
|
|
|
int can_omit = 0;
|
|
|
|
std::vector<int> to_lock_indexes;
|
|
|
|
for (auto it = tlogs.cbegin() ; it != tlogs.cend() - 1 ; it++ ) {
|
|
|
|
to_lock_indexes.push_back(it->second);
|
|
|
|
}
|
|
|
|
auto filter = [&to_lock_indexes](int index) {
|
|
|
|
return std::find(to_lock_indexes.cbegin(), to_lock_indexes.cend(), index) == to_lock_indexes.cend();
|
|
|
|
};
|
|
|
|
while(true) {
|
|
|
|
if (can_obtain_quorum(filter)) {
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
can_omit++;
|
|
|
|
ASSERT(can_omit < tlogs.size());
|
|
|
|
to_lock_indexes.pop_back();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum == 1) {
|
|
|
|
ASSERT(can_omit == 0);
|
|
|
|
}
|
|
|
|
// Our previous check of making sure there aren't too many failed logs should have prevented this.
|
|
|
|
ASSERT(!locking_failed(tlogs[tlogs.size()-can_omit-1].second));
|
|
|
|
|
|
|
|
// If we've managed to leave more tlogs unlocked than (RF-AQ), it means we've hit the case
|
|
|
|
// where the policy engine has allowed us to have multiple logs in the same failure domain
|
|
|
|
// with independant sets of data. This case will validated that no code is relying on the old
|
|
|
|
// quorum=(RF-AQ) logic, and now goes through the policy engine instead.
|
|
|
|
TEST(can_omit >= prevState.tLogReplicationFactor - prevState.tLogWriteAntiQuorum); // Locking a subset of the
|
|
|
|
TLogs while ending an epoch. const bool reboot_a_tlog = g_network->now() - g_simulator.lastConnectionFailure >
|
|
|
|
g_simulator.connectionFailuresDisableDuration && BUGGIFY && deterministicRandom()->random01() < 0.25;
|
|
|
|
TraceEvent(SevInfo, "MasterRecoveryTLogLocking", dbgid)
|
|
|
|
detail("Locks", tlogs.size() - can_omit)
|
|
|
|
detail("Skipped", can_omit)
|
|
|
|
detail("Replication", prevState.tLogReplicationFactor)
|
|
|
|
detail("Antiquorum", prevState.tLogWriteAntiQuorum)
|
|
|
|
detail("RebootBuggify", reboot_a_tlog);
|
|
|
|
for (int i = 0; i < tlogs.size() - can_omit; i++) {
|
|
|
|
const int index = tlogs[i].second;
|
|
|
|
Future<TLogLockResult>& entry = tLogReply->at(index);
|
|
|
|
if (!entry.isValid()) {
|
|
|
|
entry = lockTLog( dbgid, logServers[index] );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (reboot_a_tlog) {
|
|
|
|
g_simulator.lastConnectionFailure = g_network->now();
|
|
|
|
for (int i = 0; i < tlogs.size() - can_omit; i++) {
|
|
|
|
const int index = tlogs[i].second;
|
|
|
|
if (logServers[index]->get().present()) {
|
|
|
|
g_simulator.rebootProcess(
|
|
|
|
g_simulator.getProcessByAddress(
|
|
|
|
logServers[index]->get().interf().address()),
|
|
|
|
ISimulator::RebootProcess);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Intentionally leave `tlogs.size() - can_omit` .. `tlogs.size()` as !isValid() Futures.
|
2017-10-06 08:09:44 +08:00
|
|
|
}*/
|
2017-08-29 04:47:35 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
template <class T>
|
2021-03-11 02:06:03 +08:00
|
|
|
static vector<T> getReadyNonError(vector<Future<T>> const& futures) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// Return the values of those futures which have (non-error) values ready
|
|
|
|
std::vector<T> result;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& f : futures)
|
2017-05-26 04:48:44 +08:00
|
|
|
if (f.isReady() && !f.isError())
|
|
|
|
result.push_back(f.get());
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct sort_by_end {
|
2021-03-11 02:06:03 +08:00
|
|
|
bool operator()(TLogLockResult const& a, TLogLockResult const& b) const { return a.end < b.end; }
|
2017-05-26 04:48:44 +08:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> ILogSystem::recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
|
|
|
|
UID const& dbgid,
|
|
|
|
DBCoreState const& oldState,
|
|
|
|
FutureStream<TLogRejoinRequest> const& rejoins,
|
|
|
|
LocalityData const& locality,
|
|
|
|
bool* forceRecovery) {
|
|
|
|
return TagPartitionedLogSystem::recoverAndEndEpoch(outLogSystem, dbgid, oldState, rejoins, locality, forceRecovery);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<ILogSystem> ILogSystem::fromLogSystemConfig(UID const& dbgid,
|
|
|
|
struct LocalityData const& locality,
|
|
|
|
struct LogSystemConfig const& conf,
|
|
|
|
bool excludeRemote,
|
|
|
|
bool useRecoveredAt,
|
|
|
|
Optional<PromiseStream<Future<Void>>> addActor) {
|
2019-04-09 05:45:16 +08:00
|
|
|
if (conf.logSystemType == LogSystemType::empty)
|
2017-05-26 04:48:44 +08:00
|
|
|
return Reference<ILogSystem>();
|
2019-04-09 05:45:16 +08:00
|
|
|
else if (conf.logSystemType == LogSystemType::tagPartitioned)
|
2021-03-11 02:06:03 +08:00
|
|
|
return TagPartitionedLogSystem::fromLogSystemConfig(
|
|
|
|
dbgid, locality, conf, excludeRemote, useRecoveredAt, addActor);
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
|
|
|
throw internal_error();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<ILogSystem> ILogSystem::fromOldLogSystemConfig(UID const& dbgid,
|
|
|
|
struct LocalityData const& locality,
|
|
|
|
struct LogSystemConfig const& conf) {
|
2019-04-09 05:45:16 +08:00
|
|
|
if (conf.logSystemType == LogSystemType::empty)
|
2017-05-26 04:48:44 +08:00
|
|
|
return Reference<ILogSystem>();
|
2019-04-09 05:45:16 +08:00
|
|
|
else if (conf.logSystemType == LogSystemType::tagPartitioned)
|
2021-03-11 02:06:03 +08:00
|
|
|
return TagPartitionedLogSystem::fromOldLogSystemConfig(dbgid, locality, conf);
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
|
|
|
throw internal_error();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<ILogSystem> ILogSystem::fromServerDBInfo(UID const& dbgid,
|
|
|
|
ServerDBInfo const& dbInfo,
|
|
|
|
bool useRecoveredAt,
|
|
|
|
Optional<PromiseStream<Future<Void>>> addActor) {
|
|
|
|
return fromLogSystemConfig(dbgid, dbInfo.myLocality, dbInfo.logSystemConfig, false, useRecoveredAt, addActor);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|