foundationdb/fdbserver/TagPartitionedLogSystem.act...

397 lines
18 KiB
C++

/*
* TagPartitionedLogSystem.actor.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_G_H)
#define FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_G_H
#include "fdbserver/TagPartitionedLogSystem.actor.g.h"
#elif !defined(FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_H)
#define FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_H
#pragma once
#include "fdbclient/SystemData.h"
#include "fdbrpc/Replication.h"
#include "fdbrpc/ReplicationUtils.h"
#include "fdbrpc/simulator.h"
#include "fdbserver/DBCoreState.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/LogProtocolMessage.h"
#include "fdbserver/LogSystem.h"
#include "fdbserver/RecoveryState.h"
#include "fdbserver/ServerDBInfo.h"
#include "fdbserver/WaitFailure.h"
#include "flow/ActorCollection.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// TagPartitionedLogSystem info in old epoch
struct OldLogData {
std::vector<Reference<LogSet>> tLogs;
int32_t logRouterTags;
int32_t txsTags; // The number of txsTags, which may change across generations.
Version epochBegin, epochEnd;
std::set<int8_t> pseudoLocalities;
LogEpoch epoch;
OldLogData() : logRouterTags(0), txsTags(0), epochBegin(0), epochEnd(0), epoch(0) {}
// Constructor for T of OldTLogConf and OldTLogCoreData
template <class T>
explicit OldLogData(const T& conf)
: logRouterTags(conf.logRouterTags), txsTags(conf.txsTags), epochBegin(conf.epochBegin), epochEnd(conf.epochEnd),
pseudoLocalities(conf.pseudoLocalities), epoch(conf.epoch) {
tLogs.resize(conf.tLogs.size());
for (int j = 0; j < conf.tLogs.size(); j++) {
auto logSet = makeReference<LogSet>(conf.tLogs[j]);
tLogs[j] = logSet;
}
}
};
struct LogLockInfo {
Version epochEnd;
bool isCurrent;
Reference<LogSet> logSet;
std::vector<Future<TLogLockResult>> replies;
LogLockInfo() : epochEnd(std::numeric_limits<Version>::max()), isCurrent(false) {}
};
struct TagPartitionedLogSystem final : ILogSystem, ReferenceCounted<TagPartitionedLogSystem> {
const UID dbgid;
LogSystemType logSystemType;
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, satellite, or remote
int expectedLogSets;
int logRouterTags;
int txsTags;
UID recruitmentID;
int repopulateRegionAntiQuorum;
bool stopped;
std::set<int8_t> pseudoLocalities; // Represent special localities that will be mapped to tagLocalityLogRouter
const LogEpoch epoch;
LogEpoch oldestBackupEpoch;
// new members
std::map<Tag, Version> pseudoLocalityPopVersion;
Future<Void> rejoins;
Future<Void> recoveryComplete;
Future<Void> remoteRecovery;
Future<Void> remoteRecoveryComplete;
std::vector<LogLockInfo> lockResults;
AsyncVar<bool> recoveryCompleteWrittenToCoreState;
bool remoteLogsWrittenToCoreState;
bool hasRemoteServers;
AsyncTrigger backupWorkerChanged;
std::set<UID> removedBackupWorkers; // Workers that are removed before setting them.
Optional<Version> recoverAt;
Optional<Version> recoveredAt;
Version knownCommittedVersion;
Version backupStartVersion = invalidVersion; // max(tLogs[0].startVersion, previous epochEnd).
std::map<UID, Version> rvLogs; // recovery versions per tlog
LocalityData locality;
// For each currently running popFromLog actor, outstandingPops is
// (logID, tag)->(max popped version, durableKnownCommittedVersion).
// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
// will need to be copied into the next generation to restore the replication factor.
// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
// Stores each <log router, tag> pair's last popped version. This is used to determine whether we need to pop an old
// generation log router.
std::map<std::pair<UID, Tag>, Version> logRouterLastPops;
Optional<PromiseStream<Future<Void>>> addActor;
ActorCollection popActors;
std::vector<OldLogData> oldLogData; // each element has the log info. in one old epoch.
AsyncTrigger logSystemConfigChanged;
TagPartitionedLogSystem(UID dbgid,
LocalityData locality,
LogEpoch e,
Optional<PromiseStream<Future<Void>>> addActor = Optional<PromiseStream<Future<Void>>>())
: dbgid(dbgid), logSystemType(LogSystemType::empty), expectedLogSets(0), logRouterTags(0), txsTags(0),
repopulateRegionAntiQuorum(0), stopped(false), epoch(e), oldestBackupEpoch(0),
recoveryCompleteWrittenToCoreState(false), remoteLogsWrittenToCoreState(false), hasRemoteServers(false),
locality(locality), addActor(addActor), popActors(false) {}
void stopRejoins() final;
void addref() final;
void delref() final;
std::string describe() const final;
UID getDebugID() const final;
void addPseudoLocality(int8_t locality);
Tag getPseudoPopTag(Tag tag, ProcessClass::ClassType type) const final;
bool hasPseudoLocality(int8_t locality) const final;
// Return the min version of all pseudoLocalities, i.e., logRouter and backupTag
Version popPseudoLocalityTag(Tag tag, Version upTo) final;
static Future<Void> recoverAndEndEpoch(Reference<AsyncVar<Reference<ILogSystem>>> const& outLogSystem,
UID const& dbgid,
DBCoreState const& oldState,
FutureStream<TLogRejoinRequest> const& rejoins,
LocalityData const& locality,
bool* forceRecovery);
static Reference<ILogSystem> fromLogSystemConfig(UID const& dbgid,
LocalityData const& locality,
LogSystemConfig const& lsConf,
bool excludeRemote,
bool useRecoveredAt,
Optional<PromiseStream<Future<Void>>> addActor);
static Reference<ILogSystem> fromOldLogSystemConfig(UID const& dbgid,
LocalityData const& locality,
LogSystemConfig const& lsConf);
// Convert TagPartitionedLogSystem to DBCoreState and override input newState as return value
void toCoreState(DBCoreState& newState) final;
bool remoteStorageRecovered() final;
Future<Void> onCoreStateChanged() final;
void coreStateWritten(DBCoreState const& newState) final;
Future<Void> onError() final;
ACTOR static Future<Void> onError_internal(TagPartitionedLogSystem* self);
ACTOR static Future<Void> pushResetChecker(Reference<ConnectionResetInfo> self, NetworkAddress addr);
ACTOR static Future<TLogCommitReply> recordPushMetrics(Reference<ConnectionResetInfo> self,
Reference<Histogram> dist,
NetworkAddress addr,
Future<TLogCommitReply> in);
Future<Version> push(Version prevVersion,
Version version,
Version knownCommittedVersion,
Version minKnownCommittedVersion,
LogPushData& data,
SpanContext const& spanContext,
Optional<UID> debugID,
Optional<std::unordered_map<uint16_t, Version>> tpcvMap) final;
Reference<IPeekCursor> peekAll(UID dbgid, Version begin, Version end, Tag tag, bool parallelGetMore);
Reference<IPeekCursor> peekRemote(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore);
Reference<IPeekCursor> peek(UID dbgid, Version begin, Optional<Version> end, Tag tag, bool parallelGetMore) final;
Reference<IPeekCursor> peek(UID dbgid,
Version begin,
Optional<Version> end,
std::vector<Tag> tags,
bool parallelGetMore) final;
Reference<IPeekCursor> peekLocal(UID dbgid,
Tag tag,
Version begin,
Version end,
bool useMergePeekCursors,
int8_t peekLocality = tagLocalityInvalid);
Reference<IPeekCursor> peekTxs(UID dbgid,
Version begin,
int8_t peekLocality,
Version localEnd,
bool canDiscardPopped) final;
Reference<IPeekCursor> peekSingle(UID dbgid,
Version begin,
Tag tag,
std::vector<std::pair<Version, Tag>> history) final;
// LogRouter or BackupWorker use this function to obtain a cursor for peeking tlogs of a generation (i.e., epoch).
// Specifically, the epoch is determined by looking up "dbgid" in tlog sets of generations.
// The returned cursor can peek data at the "tag" from the given "begin" version to that epoch's end version or
// the recovery version for the latest old epoch. For the current epoch, the cursor has no end version.
Reference<IPeekCursor> peekLogRouter(UID dbgid, Version begin, Tag tag) final;
Version getKnownCommittedVersion() final;
Future<Void> onKnownCommittedVersionChange() final;
void popLogRouter(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality);
void popTxs(Version upTo, int8_t popLocality) final;
// pop 'tag.locality' type data up to the 'upTo' version
void pop(Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality) final;
// pop tag from log up to the version defined in self->outstandingPops[].first
ACTOR static Future<Void> popFromLog(TagPartitionedLogSystem* self,
Reference<AsyncVar<OptionalInterface<TLogInterface>>> log,
Tag tag,
double delayBeforePop,
bool popLogRouter);
ACTOR static Future<Version> getPoppedFromTLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag);
ACTOR static Future<Version> getPoppedTxs(TagPartitionedLogSystem* self);
Future<Version> getTxsPoppedVersion() final;
ACTOR static Future<Void> confirmEpochLive_internal(Reference<LogSet> logSet, Optional<UID> debugID);
// Returns success after confirming that pushes in the current epoch are still possible
Future<Void> confirmEpochLive(Optional<UID> debugID) final;
Future<Void> endEpoch() final;
// Call only after end_epoch() has successfully completed. Returns a new epoch immediately following this one.
// The new epoch is only provisional until the caller updates the coordinated DBCoreState.
Future<Reference<ILogSystem>> newEpoch(RecruitFromConfigurationReply const& recr,
Future<RecruitRemoteFromConfigurationReply> const& fRemoteWorkers,
UID clusterId,
DatabaseConfiguration const& config,
LogEpoch recoveryCount,
Version recoveryTransactionVersion,
int8_t primaryLocality,
int8_t remoteLocality,
std::vector<Tag> const& allTags,
Reference<AsyncVar<bool>> const& recruitmentStalled) final;
LogSystemConfig getLogSystemConfig() const final;
Standalone<StringRef> getLogsValue() const final;
Future<Void> onLogSystemConfigChange() final;
Version getEnd() const final;
Version getPeekEnd() const;
void getPushLocations(VectorRef<Tag> tags, std::vector<int>& locations, bool allLocations) const final;
bool hasRemoteLogs() const final;
Tag getRandomRouterTag() const final;
Tag getRandomTxsTag() const final;
TLogVersion getTLogVersion() const final;
int getLogRouterTags() const final;
Version getBackupStartVersion() const final;
std::map<LogEpoch, ILogSystem::EpochTagsVersionsInfo> getOldEpochTagsVersionsInfo() const final;
inline Reference<LogSet> getEpochLogSet(LogEpoch epoch) const;
void setBackupWorkers(const std::vector<InitializeBackupReply>& replies) final;
bool removeBackupWorker(const BackupWorkerDoneRequest& req) final;
LogEpoch getOldestBackupEpoch() const final;
void setOldestBackupEpoch(LogEpoch epoch) final;
ACTOR static Future<Void> monitorLog(Reference<AsyncVar<OptionalInterface<TLogInterface>>> logServer,
Reference<AsyncVar<bool>> failed);
// returns the log group's knownComittedVersion, DV, and a vector of TLogLockResults for each tLog in the group.
Optional<std::tuple<Version, Version, std::vector<TLogLockResult>>> static getDurableVersion(
UID dbgid,
LogLockInfo lockInfo,
std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>(),
Optional<Version> lastEnd = Optional<Version>());
ACTOR static Future<Void> getDurableVersionChanged(
LogLockInfo lockInfo,
std::vector<Reference<AsyncVar<bool>>> failed = std::vector<Reference<AsyncVar<bool>>>());
ACTOR static Future<Void> epochEnd(Reference<AsyncVar<Reference<ILogSystem>>> outLogSystem,
UID dbgid,
DBCoreState prevState,
FutureStream<TLogRejoinRequest> rejoinRequests,
LocalityData locality,
bool* forceRecovery);
ACTOR static Future<Void> recruitOldLogRouters(TagPartitionedLogSystem* self,
std::vector<WorkerInterface> workers,
LogEpoch recoveryCount,
int8_t locality,
Version startVersion,
std::vector<LocalityData> tLogLocalities,
Reference<IReplicationPolicy> tLogPolicy,
bool forRemote);
static Version getMaxLocalStartVersion(const std::vector<Reference<LogSet>>& tLogs);
static std::vector<Tag> getLocalTags(int8_t locality, const std::vector<Tag>& allTags);
ACTOR static Future<Void> newRemoteEpoch(TagPartitionedLogSystem* self,
Reference<TagPartitionedLogSystem> oldLogSystem,
Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
UID clusterId,
DatabaseConfiguration configuration,
LogEpoch recoveryCount,
Version recoveryTransactionVersion,
int8_t remoteLocality,
std::vector<Tag> allTags);
ACTOR static Future<Reference<ILogSystem>> newEpoch(Reference<TagPartitionedLogSystem> oldLogSystem,
RecruitFromConfigurationReply recr,
Future<RecruitRemoteFromConfigurationReply> fRemoteWorkers,
UID clusterId,
DatabaseConfiguration configuration,
LogEpoch recoveryCount,
Version recoveryTransactionVersion,
int8_t primaryLocality,
int8_t remoteLocality,
std::vector<Tag> allTags,
Reference<AsyncVar<bool>> recruitmentStalled);
ACTOR static Future<Void> trackRejoins(
UID dbgid,
std::vector<std::pair<Reference<AsyncVar<OptionalInterface<TLogInterface>>>, Reference<IReplicationPolicy>>>
logServers,
FutureStream<struct TLogRejoinRequest> rejoinRequests);
ACTOR static Future<TLogLockResult> lockTLog(UID myID, Reference<AsyncVar<OptionalInterface<TLogInterface>>> tlog);
template <class T>
static std::vector<T> getReadyNonError(std::vector<Future<T>> const& futures);
};
template <class T>
std::vector<T> TagPartitionedLogSystem::getReadyNonError(std::vector<Future<T>> const& futures) {
// Return the values of those futures which have (non-error) values ready
std::vector<T> result;
for (auto& f : futures)
if (f.isReady() && !f.isError())
result.push_back(f.get());
return result;
}
#include "flow/unactorcompiler.h"
#endif // FDBSERVER_TAGPARTITIONEDLOGSYSTEM_ACTOR_H