204 lines
7.8 KiB
C++
204 lines
7.8 KiB
C++
/*
|
|
* DBCoreState.h
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef FDBSERVER_DBCORESTATE_H
|
|
#define FDBSERVER_DBCORESTATE_H
|
|
|
|
#include <set>
|
|
#include <vector>
|
|
|
|
#include "fdbclient/FDBTypes.h"
|
|
#include "fdbrpc/ReplicationPolicy.h"
|
|
#include "fdbserver/LogSystemConfig.h"
|
|
#include "fdbserver/MasterInterface.h"
|
|
|
|
class LogSet;
|
|
struct OldLogData;
|
|
|
|
// This structure is stored persistently in CoordinatedState and must be versioned carefully!
|
|
// It records a synchronous replication topology which can be used in the absence of faults (or under a limited
|
|
// number of failures, in the case of less than full write quorums) to durably commit transactions. When faults or
|
|
// configuration changes require the topology to be changed, (a read quorum of) the old topology is locked, a new
|
|
// topology is constructed, and then committed to this coordinated state before becoming active. This process
|
|
// is called 'recovery'.
|
|
// At the moment, transaction logs are replicated but not partitioned, so the topology is as simple as a list of
|
|
// transaction log replicas and the write quorum that was used to commit to them. The read quorum required to
|
|
// ensure durability of locking and recovery is therefore tLogWriteAntiQuorum + 1.
|
|
struct CoreTLogSet {
|
|
std::vector<UID> tLogs;
|
|
std::vector<UID> backupWorkers;
|
|
int32_t tLogWriteAntiQuorum; // The write anti quorum previously used to write to tLogs, which might be different from the anti quorum suggested by the current configuration going forward!
|
|
int32_t tLogReplicationFactor; // The replication factor previously used to write to tLogs, which might be different from the current configuration
|
|
std::vector< LocalityData > tLogLocalities; // Stores the localities of the log servers
|
|
Reference<IReplicationPolicy> tLogPolicy;
|
|
bool isLocal;
|
|
int8_t locality;
|
|
Version startVersion;
|
|
std::vector<std::vector<int>> satelliteTagLocations;
|
|
TLogVersion tLogVersion;
|
|
|
|
CoreTLogSet() : tLogWriteAntiQuorum(0), tLogReplicationFactor(0), isLocal(true), locality(tagLocalityUpgraded), startVersion(invalidVersion) {}
|
|
explicit CoreTLogSet(const LogSet& logset);
|
|
|
|
bool operator==(CoreTLogSet const& rhs) const {
|
|
return tLogs == rhs.tLogs && backupWorkers == rhs.backupWorkers &&
|
|
tLogWriteAntiQuorum == rhs.tLogWriteAntiQuorum && tLogReplicationFactor == rhs.tLogReplicationFactor &&
|
|
isLocal == rhs.isLocal && satelliteTagLocations == rhs.satelliteTagLocations &&
|
|
locality == rhs.locality && startVersion == rhs.startVersion &&
|
|
((!tLogPolicy && !rhs.tLogPolicy) ||
|
|
(tLogPolicy && rhs.tLogPolicy && (tLogPolicy->info() == rhs.tLogPolicy->info())));
|
|
}
|
|
|
|
template <class Archive>
|
|
void serialize(Archive& ar) {
|
|
serializer(ar, tLogs, tLogWriteAntiQuorum, tLogReplicationFactor, tLogPolicy, tLogLocalities, isLocal, locality, startVersion, satelliteTagLocations);
|
|
if (ar.isDeserializing && !ar.protocolVersion().hasTLogVersion()) {
|
|
tLogVersion = TLogVersion::V2;
|
|
} else {
|
|
serializer(ar, tLogVersion);
|
|
}
|
|
if (ar.protocolVersion() > 0x0FDB00B061070001LL) {
|
|
serializer(ar, backupWorkers);
|
|
}
|
|
}
|
|
};
|
|
|
|
struct OldTLogCoreData {
|
|
std::vector<CoreTLogSet> tLogs;
|
|
int32_t logRouterTags;
|
|
int32_t txsTags;
|
|
Version epochEnd;
|
|
std::set<int8_t> pseudoLocalities;
|
|
LogEpoch epoch;
|
|
|
|
OldTLogCoreData() : epochEnd(0), logRouterTags(0), txsTags(0), epoch(0) {}
|
|
explicit OldTLogCoreData(const OldLogData&);
|
|
|
|
bool operator==(const OldTLogCoreData& rhs) const {
|
|
return tLogs == rhs.tLogs && logRouterTags == rhs.logRouterTags && txsTags == rhs.txsTags &&
|
|
epochEnd == rhs.epochEnd && pseudoLocalities == rhs.pseudoLocalities && epoch == rhs.epoch;
|
|
}
|
|
|
|
template <class Archive>
|
|
void serialize(Archive& ar) {
|
|
if( ar.protocolVersion().hasTagLocality()) {
|
|
serializer(ar, tLogs, logRouterTags, epochEnd);
|
|
}
|
|
else if(ar.isDeserializing) {
|
|
tLogs.push_back(CoreTLogSet());
|
|
serializer(ar, tLogs[0].tLogs, tLogs[0].tLogWriteAntiQuorum, tLogs[0].tLogReplicationFactor, tLogs[0].tLogPolicy, epochEnd, tLogs[0].tLogLocalities);
|
|
tLogs[0].tLogVersion = TLogVersion::V2;
|
|
}
|
|
if (ar.protocolVersion().hasPseudoLocalities()) {
|
|
serializer(ar, pseudoLocalities);
|
|
}
|
|
if (ar.protocolVersion().hasShardedTxsTags()) {
|
|
serializer(ar, txsTags);
|
|
}
|
|
if (ar.protocolVersion().hasBackupWorker()) {
|
|
serializer(ar, epoch);
|
|
}
|
|
}
|
|
};
|
|
|
|
struct DBCoreState {
|
|
std::vector<CoreTLogSet> tLogs;
|
|
int32_t logRouterTags;
|
|
int32_t txsTags;
|
|
std::vector<OldTLogCoreData> oldTLogData;
|
|
DBRecoveryCount recoveryCount; // Increases with sequential successful recoveries.
|
|
LogSystemType logSystemType;
|
|
std::set<int8_t> pseudoLocalities;
|
|
LogEpoch epoch;
|
|
|
|
DBCoreState() : logRouterTags(0), txsTags(0), recoveryCount(0), logSystemType(LogSystemType::empty), epoch(0) {}
|
|
|
|
vector<UID> getPriorCommittedLogServers() {
|
|
vector<UID> priorCommittedLogServers;
|
|
for(auto& it : tLogs) {
|
|
for(auto& log : it.tLogs) {
|
|
priorCommittedLogServers.push_back(log);
|
|
}
|
|
}
|
|
for(int i = 0; i < oldTLogData.size(); i++) {
|
|
for(auto& it : oldTLogData[i].tLogs) {
|
|
for(auto& log : it.tLogs) {
|
|
priorCommittedLogServers.push_back(log);
|
|
}
|
|
}
|
|
}
|
|
return priorCommittedLogServers;
|
|
}
|
|
|
|
bool isEqual(const DBCoreState& r) const {
|
|
return logSystemType == r.logSystemType && recoveryCount == r.recoveryCount && tLogs == r.tLogs &&
|
|
oldTLogData == r.oldTLogData && logRouterTags == r.logRouterTags && txsTags == r.txsTags &&
|
|
pseudoLocalities == r.pseudoLocalities && epoch == r.epoch;
|
|
}
|
|
bool operator==(const DBCoreState& rhs) const { return isEqual(rhs); }
|
|
|
|
template <class Archive>
|
|
void serialize(Archive& ar) {
|
|
//FIXME: remove when we no longer need to test upgrades from 4.X releases
|
|
if(g_network->isSimulated() && !ar.protocolVersion().hasMultiGenerationTLog()) {
|
|
TraceEvent("ElapsedTime").detail("SimTime", now()).detail("RealTime", 0).detail("RandomUnseed", 0);
|
|
flushAndExit(0);
|
|
}
|
|
|
|
ASSERT(ar.protocolVersion().hasMultiGenerationTLog());
|
|
if(ar.protocolVersion().hasTagLocality()) {
|
|
serializer(ar, tLogs, logRouterTags, oldTLogData, recoveryCount, logSystemType);
|
|
if (ar.protocolVersion().hasPseudoLocalities()) {
|
|
serializer(ar, pseudoLocalities);
|
|
}
|
|
if (ar.protocolVersion().hasShardedTxsTags()) {
|
|
serializer(ar, txsTags);
|
|
}
|
|
if (ar.protocolVersion().hasBackupWorker()) {
|
|
serializer(ar, epoch); // TODO: serialize epoch in higher version?
|
|
}
|
|
} else if(ar.isDeserializing) {
|
|
tLogs.push_back(CoreTLogSet());
|
|
serializer(ar, tLogs[0].tLogs, tLogs[0].tLogWriteAntiQuorum, recoveryCount, tLogs[0].tLogReplicationFactor, logSystemType);
|
|
tLogs[0].tLogVersion = TLogVersion::V2;
|
|
|
|
uint64_t tLocalitySize = (uint64_t)tLogs[0].tLogLocalities.size();
|
|
serializer(ar, oldTLogData, tLogs[0].tLogPolicy, tLocalitySize);
|
|
if (ar.isDeserializing) {
|
|
tLogs[0].tLogLocalities.reserve(tLocalitySize);
|
|
for (size_t i = 0; i < tLocalitySize; i++) {
|
|
LocalityData locality;
|
|
serializer(ar, locality);
|
|
tLogs[0].tLogLocalities.push_back(locality);
|
|
}
|
|
|
|
if(oldTLogData.size()) {
|
|
tLogs[0].startVersion = oldTLogData[0].epochEnd;
|
|
for(int i = 0; i < oldTLogData.size() - 1; i++) {
|
|
oldTLogData[i].tLogs[0].startVersion = oldTLogData[i+1].epochEnd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
#endif
|