314 lines
12 KiB
C++
314 lines
12 KiB
C++
/*
|
|
* ClusterRecovery.actor.h
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
|
|
// version.
|
|
#include "flow/Trace.h"
|
|
#include <utility>
|
|
|
|
#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_CLUSTERRECOVERY_ACTOR_G_H)
|
|
#define FDBSERVER_CLUSTERRECOVERY_ACTOR_G_H
|
|
#include "fdbserver/ClusterRecovery.actor.g.h"
|
|
#elif !defined(FDBSERVER_CLUSTERRECOVERY_ACTOR_H)
|
|
#define FDBSERVER_CLUSTERRECOVERY_ACTOR_H
|
|
|
|
#include "fdbclient/DatabaseContext.h"
|
|
#include "fdbrpc/Replication.h"
|
|
#include "fdbrpc/ReplicationUtils.h"
|
|
#include "fdbserver/CoordinatedState.h"
|
|
#include "fdbserver/CoordinationInterface.h" // copy constructors for ServerCoordinators class
|
|
#include "fdbserver/ClusterController.actor.h"
|
|
#include "fdbserver/DBCoreState.h"
|
|
#include "fdbserver/Knobs.h"
|
|
#include "fdbserver/LogSystem.h"
|
|
#include "fdbserver/LogSystemConfig.h"
|
|
#include "fdbserver/LogSystemDiskQueueAdapter.h"
|
|
#include "fdbserver/MoveKeys.actor.h"
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
|
#include "flow/Error.h"
|
|
#include "flow/SystemMonitor.h"
|
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
typedef enum {
|
|
CLUSTER_RECOVERY_STATE_EVENT_NAME,
|
|
CLUSTER_RECOVERY_COMMIT_TLOG_EVENT_NAME,
|
|
CLUSTER_RECOVERY_DURATION_EVENT_NAME,
|
|
CLUSTER_RECOVERY_GENERATION_EVENT_NAME,
|
|
CLUSTER_RECOVERY_SS_RECRUITMENT_EVENT_NAME,
|
|
CLUSTER_RECOVERY_INVALID_CONFIG_EVENT_NAME,
|
|
CLUSTER_RECOVERY_RECOVERING_EVENT_NAME,
|
|
CLUSTER_RECOVERY_RECOVERED_EVENT_NAME,
|
|
CLUSTER_RECOVERY_SNAPSHOT_CHECK_EVENT_NAME,
|
|
CLUSTER_RECOVERY_PAUSE_AGENT_BACKUP_EVENT_NAME,
|
|
CLUSTER_RECOVERY_COMMIT_EVENT_NAME,
|
|
CLUSTER_RECOVERY_AVAILABLE_EVENT_NAME,
|
|
CLUSTER_RECOVERY_METRICS_EVENT_NAME,
|
|
CLUSTER_RECOVERY_LAST // Always the last entry
|
|
} ClusterRecoveryEventType;
|
|
|
|
ACTOR Future<Void> recoveryTerminateOnConflict(UID dbgid,
|
|
Promise<Void> fullyRecovered,
|
|
Future<Void> onConflict,
|
|
Future<Void> switchedState);
|
|
std::string& getRecoveryEventName(ClusterRecoveryEventType type);
|
|
|
|
class ReusableCoordinatedState : NonCopyable {
|
|
public:
|
|
Promise<Void> fullyRecovered;
|
|
DBCoreState prevDBState;
|
|
DBCoreState myDBState;
|
|
bool finalWriteStarted;
|
|
Future<Void> previousWrite;
|
|
|
|
ReusableCoordinatedState(ServerCoordinators const& coordinators,
|
|
PromiseStream<Future<Void>> const& addActor,
|
|
UID const& dbgid)
|
|
: finalWriteStarted(false), previousWrite(Void()), cstate(coordinators), coordinators(coordinators),
|
|
addActor(addActor), dbgid(dbgid) {}
|
|
|
|
Future<Void> read() { return _read(this); }
|
|
|
|
Future<Void> write(DBCoreState newState, bool finalWrite = false) {
|
|
previousWrite = _write(this, newState, finalWrite);
|
|
return previousWrite;
|
|
}
|
|
|
|
Future<Void> move(ClusterConnectionString const& nc) { return cstate.move(nc); }
|
|
|
|
private:
|
|
MovableCoordinatedState cstate;
|
|
ServerCoordinators coordinators;
|
|
PromiseStream<Future<Void>> addActor;
|
|
Promise<Void> switchedState;
|
|
UID dbgid;
|
|
|
|
ACTOR Future<Void> _read(ReusableCoordinatedState* self) {
|
|
Value prevDBStateRaw = wait(self->cstate.read());
|
|
Future<Void> onConflict = recoveryTerminateOnConflict(
|
|
self->dbgid, self->fullyRecovered, self->cstate.onConflict(), self->switchedState.getFuture());
|
|
if (onConflict.isReady() && onConflict.isError()) {
|
|
throw onConflict.getError();
|
|
}
|
|
self->addActor.send(onConflict);
|
|
|
|
if (prevDBStateRaw.size()) {
|
|
self->prevDBState = BinaryReader::fromStringRef<DBCoreState>(prevDBStateRaw, IncludeVersion());
|
|
self->myDBState = self->prevDBState;
|
|
}
|
|
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> _write(ReusableCoordinatedState* self, DBCoreState newState, bool finalWrite) {
|
|
if (self->finalWriteStarted) {
|
|
wait(Future<Void>(Never()));
|
|
}
|
|
|
|
if (finalWrite) {
|
|
self->finalWriteStarted = true;
|
|
}
|
|
|
|
try {
|
|
wait(self->cstate.setExclusive(
|
|
BinaryWriter::toValue(newState, IncludeVersion(ProtocolVersion::withDBCoreState()))));
|
|
} catch (Error& e) {
|
|
TEST(true); // Master displaced during writeMasterState
|
|
throw;
|
|
}
|
|
|
|
self->myDBState = newState;
|
|
|
|
if (!finalWrite) {
|
|
self->switchedState.send(Void());
|
|
self->cstate = MovableCoordinatedState(self->coordinators);
|
|
Value rereadDBStateRaw = wait(self->cstate.read());
|
|
DBCoreState readState;
|
|
if (rereadDBStateRaw.size())
|
|
readState = BinaryReader::fromStringRef<DBCoreState>(rereadDBStateRaw, IncludeVersion());
|
|
|
|
if (readState != newState) {
|
|
TraceEvent("RecoveryTerminated", self->dbgid).detail("Reason", "CStateChanged");
|
|
TEST(true); // Coordinated state changed between writing and reading, recovery restarting
|
|
throw worker_removed();
|
|
}
|
|
self->switchedState = Promise<Void>();
|
|
self->addActor.send(recoveryTerminateOnConflict(
|
|
self->dbgid, self->fullyRecovered, self->cstate.onConflict(), self->switchedState.getFuture()));
|
|
} else {
|
|
self->fullyRecovered.send(Void());
|
|
}
|
|
|
|
return Void();
|
|
}
|
|
};
|
|
|
|
struct ClusterRecoveryData : NonCopyable, ReferenceCounted<ClusterRecoveryData> {
|
|
ClusterControllerData* controllerData;
|
|
|
|
UID dbgid;
|
|
|
|
AsyncTrigger registrationTrigger;
|
|
Version lastEpochEnd, // The last version in the old epoch not (to be) rolled back in this recovery
|
|
recoveryTransactionVersion; // The first version in this epoch
|
|
Optional<int64_t> versionEpoch; // The epoch which all versions are based off of
|
|
double lastCommitTime;
|
|
|
|
Version liveCommittedVersion; // The largest live committed version reported by commit proxies.
|
|
bool databaseLocked;
|
|
Optional<Value> proxyMetadataVersion;
|
|
Version minKnownCommittedVersion;
|
|
|
|
DatabaseConfiguration originalConfiguration;
|
|
DatabaseConfiguration configuration;
|
|
std::vector<Optional<Key>> primaryDcId;
|
|
std::vector<Optional<Key>> remoteDcIds;
|
|
bool hasConfiguration;
|
|
|
|
ServerCoordinators coordinators;
|
|
|
|
Reference<ILogSystem> logSystem;
|
|
double lastVersionTime;
|
|
LogSystemDiskQueueAdapter* txnStateLogAdapter;
|
|
IKeyValueStore* txnStateStore;
|
|
int64_t memoryLimit;
|
|
std::map<Optional<Value>, int8_t> dcId_locality;
|
|
std::vector<Tag> allTags;
|
|
|
|
int8_t getNextLocality() {
|
|
int8_t maxLocality = -1;
|
|
for (auto it : dcId_locality) {
|
|
maxLocality = std::max(maxLocality, it.second);
|
|
}
|
|
return maxLocality + 1;
|
|
}
|
|
|
|
std::vector<CommitProxyInterface> commitProxies;
|
|
std::vector<CommitProxyInterface> provisionalCommitProxies;
|
|
std::vector<GrvProxyInterface> grvProxies;
|
|
std::vector<GrvProxyInterface> provisionalGrvProxies;
|
|
std::vector<ResolverInterface> resolvers;
|
|
|
|
std::map<UID, CommitProxyVersionReplies> lastCommitProxyVersionReplies;
|
|
|
|
UID clusterId;
|
|
Version initialClusterVersion = -1;
|
|
Standalone<StringRef> dbId;
|
|
|
|
MasterInterface masterInterface;
|
|
LifetimeToken masterLifetime;
|
|
const ClusterControllerFullInterface
|
|
clusterController; // If the cluster controller changes, this master will die, so this is immutable.
|
|
|
|
ReusableCoordinatedState cstate;
|
|
Promise<Void> recoveryReadyForCommits;
|
|
Promise<Void> cstateUpdated;
|
|
Reference<AsyncVar<ServerDBInfo> const> dbInfo;
|
|
int64_t registrationCount; // Number of different MasterRegistrationRequests sent to clusterController
|
|
|
|
RecoveryState recoveryState;
|
|
|
|
PromiseStream<Future<Void>> addActor;
|
|
Reference<AsyncVar<bool>> recruitmentStalled;
|
|
bool forceRecovery;
|
|
bool neverCreated;
|
|
int8_t safeLocality;
|
|
int8_t primaryLocality;
|
|
|
|
std::vector<WorkerInterface> backupWorkers; // Recruited backup workers from cluster controller.
|
|
|
|
CounterCollection cc;
|
|
Counter changeCoordinatorsRequests;
|
|
Counter getCommitVersionRequests;
|
|
Counter backupWorkerDoneRequests;
|
|
Counter getLiveCommittedVersionRequests;
|
|
Counter reportLiveCommittedVersionRequests;
|
|
|
|
Future<Void> logger;
|
|
|
|
Reference<EventCacheHolder> swVersionCheckedEventHolder;
|
|
Reference<EventCacheHolder> recoveredConfigEventHolder;
|
|
Reference<EventCacheHolder> clusterRecoveryStateEventHolder;
|
|
Reference<EventCacheHolder> clusterRecoveryGenerationsEventHolder;
|
|
Reference<EventCacheHolder> clusterRecoveryDurationEventHolder;
|
|
Reference<EventCacheHolder> clusterRecoveryAvailableEventHolder;
|
|
|
|
ClusterRecoveryData(ClusterControllerData* controllerData,
|
|
Reference<AsyncVar<ServerDBInfo> const> const& dbInfo,
|
|
MasterInterface const& masterInterface,
|
|
LifetimeToken const& masterLifetimeToken,
|
|
ServerCoordinators const& coordinators,
|
|
ClusterControllerFullInterface const& clusterController,
|
|
Standalone<StringRef> const& dbId,
|
|
PromiseStream<Future<Void>> const& addActor,
|
|
bool forceRecovery)
|
|
|
|
: controllerData(controllerData), dbgid(masterInterface.id()), lastEpochEnd(invalidVersion),
|
|
recoveryTransactionVersion(invalidVersion), lastCommitTime(0), liveCommittedVersion(invalidVersion),
|
|
databaseLocked(false), minKnownCommittedVersion(invalidVersion), hasConfiguration(false),
|
|
coordinators(coordinators), lastVersionTime(0), txnStateStore(nullptr), memoryLimit(2e9), dbId(dbId),
|
|
masterInterface(masterInterface), masterLifetime(masterLifetimeToken), clusterController(clusterController),
|
|
cstate(coordinators, addActor, dbgid), dbInfo(dbInfo), registrationCount(0), addActor(addActor),
|
|
recruitmentStalled(makeReference<AsyncVar<bool>>(false)), forceRecovery(forceRecovery), neverCreated(false),
|
|
safeLocality(tagLocalityInvalid), primaryLocality(tagLocalityInvalid), cc("Master", dbgid.toString()),
|
|
changeCoordinatorsRequests("ChangeCoordinatorsRequests", cc),
|
|
getCommitVersionRequests("GetCommitVersionRequests", cc),
|
|
backupWorkerDoneRequests("BackupWorkerDoneRequests", cc),
|
|
getLiveCommittedVersionRequests("GetLiveCommittedVersionRequests", cc),
|
|
reportLiveCommittedVersionRequests("ReportLiveCommittedVersionRequests", cc),
|
|
swVersionCheckedEventHolder(makeReference<EventCacheHolder>("SWVersionCompatibilityChecked")),
|
|
recoveredConfigEventHolder(makeReference<EventCacheHolder>("RecoveredConfig")) {
|
|
clusterRecoveryStateEventHolder = makeReference<EventCacheHolder>(
|
|
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_STATE_EVENT_NAME));
|
|
clusterRecoveryGenerationsEventHolder = makeReference<EventCacheHolder>(
|
|
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_GENERATION_EVENT_NAME));
|
|
clusterRecoveryDurationEventHolder = makeReference<EventCacheHolder>(
|
|
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_DURATION_EVENT_NAME));
|
|
clusterRecoveryAvailableEventHolder = makeReference<EventCacheHolder>(
|
|
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_AVAILABLE_EVENT_NAME));
|
|
logger = traceCounters(getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_METRICS_EVENT_NAME),
|
|
dbgid,
|
|
SERVER_KNOBS->WORKER_LOGGING_INTERVAL,
|
|
&cc,
|
|
getRecoveryEventName(ClusterRecoveryEventType::CLUSTER_RECOVERY_METRICS_EVENT_NAME));
|
|
if (forceRecovery && !controllerData->clusterControllerDcId.present()) {
|
|
TraceEvent(SevError, "ForcedRecoveryRequiresDcID").log();
|
|
forceRecovery = false;
|
|
}
|
|
}
|
|
~ClusterRecoveryData() {
|
|
if (txnStateStore)
|
|
txnStateStore->close();
|
|
}
|
|
};
|
|
|
|
ACTOR Future<Void> recruitNewMaster(ClusterControllerData* cluster,
|
|
ClusterControllerData::DBInfo* db,
|
|
MasterInterface* newMaster);
|
|
ACTOR Future<Void> cleanupRecoveryActorCollection(Reference<ClusterRecoveryData> self, bool exThrown);
|
|
ACTOR Future<Void> clusterRecoveryCore(Reference<ClusterRecoveryData> self);
|
|
bool isNormalClusterRecoveryError(const Error&);
|
|
|
|
#include "flow/unactorcompiler.h"
|
|
|
|
#endif
|