FastRestore: Resolve review comments
1) Do not keep restore role data (e.g., masterData) in restore worker; 2) Change function parameter list by only passing in the needed variables in role data; 3) Remove unneccessary files vector from masterData; 4) Change typos in comments and some functions name.
This commit is contained in:
parent
701676dbd2
commit
f1741aa90d
|
@ -37,7 +37,9 @@
|
|||
ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendMutationVectorVersionedRequest req, Reference<RestoreApplierData> self);
|
||||
ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self, Database cx);
|
||||
|
||||
ACTOR Future<Void> restoreApplierCore(Reference<RestoreApplierData> self, RestoreApplierInterface applierInterf, Database cx) {
|
||||
ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx) {
|
||||
state Reference<RestoreApplierData> self = Reference<RestoreApplierData>( new RestoreApplierData(applierInterf.id(), nodeIndex) );
|
||||
|
||||
state ActorCollection actors(false);
|
||||
state Future<Void> exitRole = Never();
|
||||
state double lastLoopTopTime;
|
||||
|
|
|
@ -127,7 +127,7 @@ struct RestoreApplierData : RestoreRoleData, public ReferenceCounted<RestoreAppl
|
|||
};
|
||||
|
||||
|
||||
ACTOR Future<Void> restoreApplierCore(Reference<RestoreApplierData> self, RestoreApplierInterface applierInterf, Database cx);
|
||||
ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx);
|
||||
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
|
|
|
@ -32,9 +32,10 @@ bool isRangeMutation(MutationRef m);
|
|||
void splitMutation(Reference<RestoreLoaderData> self, MutationRef m, Arena& mvector_arena, VectorRef<MutationRef>& mvector, Arena& nodeIDs_arena, VectorRef<UID>& nodeIDs) ;
|
||||
void _parseSerializedMutation(VersionedMutationsMap *kvOps, SerializedMutationListMap *mutationMap, bool isSampling = false);
|
||||
|
||||
ACTOR Future<Void> handleRestoreSysInfoRequest(RestoreSysInfoRequest req, Reference<RestoreLoaderData> self);
|
||||
ACTOR Future<Void> handleSetApplierKeyRangeVectorRequest(RestoreSetApplierKeyRangeVectorRequest req, Reference<RestoreLoaderData> self);
|
||||
ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<RestoreLoaderData> self, bool isSampling = false);
|
||||
ACTOR Future<Void> registerMutationsToApplier(Reference<RestoreLoaderData> self, VersionedMutationsMap *kvOps, bool isRangeFile, Version startVersion, Version endVersion);
|
||||
ACTOR Future<Void> sendMutationsToApplier(Reference<RestoreLoaderData> self, VersionedMutationsMap *kvOps, bool isRangeFile, Version startVersion, Version endVersion);
|
||||
ACTOR static Future<Void> _parseLogFileToMutationsOnLoader(SerializedMutationListMap *mutationMap,
|
||||
std::map<Standalone<StringRef>, uint32_t> *mutationPartMap,
|
||||
Reference<IBackupContainer> bc, Version version,
|
||||
|
@ -46,7 +47,9 @@ ACTOR static Future<Void> _parseRangeFileToMutationsOnLoader(VersionedMutationsM
|
|||
std::string fileName, int64_t readOffset_input, int64_t readLen_input,KeyRange restoreRange);
|
||||
|
||||
|
||||
ACTOR Future<Void> restoreLoaderCore(Reference<RestoreLoaderData> self, RestoreLoaderInterface loaderInterf, Database cx) {
|
||||
ACTOR Future<Void> restoreLoaderCore(RestoreLoaderInterface loaderInterf, int nodeIndex, Database cx) {
|
||||
state Reference<RestoreLoaderData> self = Reference<RestoreLoaderData>( new RestoreLoaderData(loaderInterf.id(), nodeIndex) );
|
||||
|
||||
state ActorCollection actors(false);
|
||||
state Future<Void> exitRole = Never();
|
||||
state double lastLoopTopTime;
|
||||
|
@ -67,6 +70,10 @@ ACTOR Future<Void> restoreLoaderCore(Reference<RestoreLoaderData> self, RestoreL
|
|||
requestTypeStr = "heartbeat";
|
||||
actors.add(handleHeartbeat(req, loaderInterf.id()));
|
||||
}
|
||||
when ( RestoreSysInfoRequest req = waitNext(loaderInterf.updateRestoreSysInfo.getFuture()) ) {
|
||||
requestTypeStr = "updateRestoreSysInfo";
|
||||
actors.add( handleRestoreSysInfoRequest(req, self) );
|
||||
}
|
||||
when ( RestoreSetApplierKeyRangeVectorRequest req = waitNext(loaderInterf.setApplierKeyRangeVectorRequest.getFuture()) ) {
|
||||
requestTypeStr = "setApplierKeyRangeVectorRequest";
|
||||
actors.add(handleSetApplierKeyRangeVectorRequest(req, self));
|
||||
|
@ -98,6 +105,24 @@ ACTOR Future<Void> restoreLoaderCore(Reference<RestoreLoaderData> self, RestoreL
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Assume: Only update the local data if it (applierInterf) has not been set
|
||||
ACTOR Future<Void> handleRestoreSysInfoRequest(RestoreSysInfoRequest req, Reference<RestoreLoaderData> self) {
|
||||
TraceEvent("FastRestore").detail("HandleRestoreSysInfoRequest", self->id());
|
||||
ASSERT(self.isValid());
|
||||
|
||||
// The loader has received the appliers interfaces
|
||||
if ( !self->appliersInterf.empty() ) {
|
||||
req.reply.send(RestoreCommonReply(self->id()));
|
||||
return Void();
|
||||
}
|
||||
|
||||
self->appliersInterf = req.sysInfo.appliers;
|
||||
|
||||
req.reply.send(RestoreCommonReply(self->id()) );
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
||||
ACTOR Future<Void> handleSetApplierKeyRangeVectorRequest(RestoreSetApplierKeyRangeVectorRequest req, Reference<RestoreLoaderData> self) {
|
||||
// Idempodent operation. OK to re-execute the duplicate cmd
|
||||
if ( self->range2Applier.empty() ) {
|
||||
|
@ -139,7 +164,7 @@ ACTOR Future<Void> _processLoadingParam(LoadingParam param, Reference<RestoreLoa
|
|||
_parseSerializedMutation(&kvOps, &mutationMap);
|
||||
}
|
||||
|
||||
wait( registerMutationsToApplier(self, &kvOps, param.isRangeFile, param.prevVersion, param.endVersion) ); // Send the parsed mutation to applier who will apply the mutation to DB
|
||||
wait( sendMutationsToApplier(self, &kvOps, param.isRangeFile, param.prevVersion, param.endVersion) ); // Send the parsed mutation to applier who will apply the mutation to DB
|
||||
|
||||
TraceEvent("FastRestore").detail("Loader", self->id()).detail("FinishLoadingFile", param.filename);
|
||||
|
||||
|
@ -160,14 +185,14 @@ ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<R
|
|||
}
|
||||
|
||||
// TODO: This function can be revised better
|
||||
ACTOR Future<Void> registerMutationsToApplier(Reference<RestoreLoaderData> self,
|
||||
ACTOR Future<Void> sendMutationsToApplier(Reference<RestoreLoaderData> self,
|
||||
VersionedMutationsMap *pkvOps,
|
||||
bool isRangeFile, Version startVersion, Version endVersion) {
|
||||
state VersionedMutationsMap &kvOps = *pkvOps;
|
||||
state int kvCount = 0;
|
||||
state int splitMutationIndex = 0;
|
||||
|
||||
TraceEvent("FastRestore").detail("RegisterMutationToApplier", self->id()).detail("IsRangeFile", isRangeFile)
|
||||
TraceEvent("FastRestore").detail("SendMutationToApplier", self->id()).detail("IsRangeFile", isRangeFile)
|
||||
.detail("StartVersion", startVersion).detail("EndVersion", endVersion);
|
||||
|
||||
// Ensure there is a mutation request sent at endVersion, so that applier can advance its notifiedVersion
|
||||
|
@ -233,7 +258,7 @@ ACTOR Future<Void> registerMutationsToApplier(Reference<RestoreLoaderData> self,
|
|||
}
|
||||
} // Mutations at the same version
|
||||
|
||||
// Register the mutations to appliers for each version
|
||||
// Send the mutations to appliers for each version
|
||||
for (auto &applierID : applierIDs) {
|
||||
requests.push_back( std::make_pair(applierID, RestoreSendMutationVectorVersionedRequest(prevVersion, commitVersion, isRangeFile, applierMutationsBuffer[applierID])) );
|
||||
applierMutationsBuffer[applierID].pop_front(applierMutationsBuffer[applierID].size());
|
||||
|
@ -245,7 +270,7 @@ ACTOR Future<Void> registerMutationsToApplier(Reference<RestoreLoaderData> self,
|
|||
prevVersion = commitVersion;
|
||||
} // all versions of mutations
|
||||
|
||||
TraceEvent("FastRestore").detail("LoaderRegisterMutationOnAppliers", kvCount);
|
||||
TraceEvent("FastRestore").detail("LoaderSendMutationOnAppliers", kvCount);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -433,7 +458,7 @@ ACTOR static Future<Void> _parseRangeFileToMutationsOnLoader(VersionedMutationsM
|
|||
while(rangeStart < rangeEnd && !restoreRange.contains(blockData[rangeStart].key)) {
|
||||
++rangeStart;
|
||||
}
|
||||
// Side end backwaself, stop if something at (rangeEnd-1) is found in range
|
||||
// Side end from back, stop if something at (rangeEnd-1) is found in range
|
||||
while(rangeEnd > rangeStart && !restoreRange.contains(blockData[rangeEnd - 1].key)) {
|
||||
--rangeEnd;
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
|
|||
role = RestoreRole::Loader;
|
||||
}
|
||||
|
||||
~RestoreLoaderData() {} = default;
|
||||
~RestoreLoaderData() = default;
|
||||
|
||||
std::string describeNode() {
|
||||
std::stringstream ss;
|
||||
|
@ -101,7 +101,7 @@ struct RestoreLoaderData : RestoreRoleData, public ReferenceCounted<RestoreLoade
|
|||
};
|
||||
|
||||
|
||||
ACTOR Future<Void> restoreLoaderCore(Reference<RestoreLoaderData> self, RestoreLoaderInterface loaderInterf, Database cx);
|
||||
ACTOR Future<Void> restoreLoaderCore(RestoreLoaderInterface loaderInterf, int nodeIndex, Database cx);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
|
@ -36,10 +36,14 @@
|
|||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ACTOR static Future<Void> _clearDB(Database cx);
|
||||
ACTOR static Future<Void> _collectBackupFiles(Reference<RestoreMasterData> self, Database cx, RestoreRequest request);
|
||||
ACTOR static Future<Void> _collectBackupFiles(Reference<IBackupContainer> bc, std::vector<RestoreFileFR>* output_files, Database cx, RestoreRequest request);
|
||||
|
||||
ACTOR static Future<Version> processRestoreRequest(RestoreRequest request, Reference<RestoreMasterData> self, Database cx);
|
||||
ACTOR static Future<Void> startProcessRestoreRequests(Reference<RestoreMasterData> self, Database cx);
|
||||
ACTOR static Future<Void> distributeWorkloadPerVersionBatch(Reference<RestoreMasterData> self, Database cx, RestoreRequest request, VersionBatch versionBatch);
|
||||
|
||||
ACTOR static Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> masterWorker, Reference<RestoreMasterData> masterData);
|
||||
ACTOR static Future<Void> distributeRestoreSysInfo(Reference<RestoreWorkerData> masterWorker, Reference<RestoreMasterData> masterData);
|
||||
|
||||
ACTOR static Future<Standalone<VectorRef<RestoreRequest>>> collectRestoreRequests(Database cx);
|
||||
ACTOR static Future<Void> initializeVersionBatch(Reference<RestoreMasterData> self);
|
||||
|
@ -49,6 +53,80 @@ ACTOR static Future<Void> notifyRestoreCompleted(Reference<RestoreMasterData> se
|
|||
|
||||
void dummySampleWorkload(Reference<RestoreMasterData> self);
|
||||
|
||||
|
||||
ACTOR Future<Void> startRestoreMaster(Reference<RestoreWorkerData> masterWorker, Database cx) {
|
||||
state Reference<RestoreMasterData> self = Reference<RestoreMasterData>(new RestoreMasterData());
|
||||
|
||||
// recruitRestoreRoles must come after masterWorker has finished collectWorkerInterface
|
||||
wait( recruitRestoreRoles(masterWorker, self) );
|
||||
|
||||
wait( distributeRestoreSysInfo(masterWorker, self) );
|
||||
|
||||
wait( startProcessRestoreRequests(self, cx) );
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// RestoreWorker that has restore master role: Recruite a role for each worker
|
||||
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> masterWorker, Reference<RestoreMasterData> masterData) {
|
||||
TraceEvent("FastRestore").detail("RecruitRestoreRoles", masterWorker->workerInterfaces.size())
|
||||
.detail("NumLoaders", opConfig.num_loaders).detail("NumAppliers", opConfig.num_appliers);
|
||||
|
||||
ASSERT( masterData.isValid() );
|
||||
ASSERT( opConfig.num_loaders > 0 && opConfig.num_appliers > 0 );
|
||||
ASSERT( opConfig.num_loaders + opConfig.num_appliers <= masterWorker->workerInterfaces.size() ); // We assign 1 role per worker for now
|
||||
|
||||
// Assign a role to each worker
|
||||
state int nodeIndex = 0;
|
||||
state RestoreRole role;
|
||||
std::map<UID, RestoreRecruitRoleRequest> requests;
|
||||
for (auto &workerInterf : masterWorker->workerInterfaces) {
|
||||
if ( nodeIndex >= 0 && nodeIndex < opConfig.num_appliers ) {
|
||||
// [0, numApplier) are appliers
|
||||
role = RestoreRole::Applier;
|
||||
} else if ( nodeIndex >= opConfig.num_appliers && nodeIndex < opConfig.num_loaders + opConfig.num_appliers ) {
|
||||
// [numApplier, numApplier + numLoader) are loaders
|
||||
role = RestoreRole::Loader;
|
||||
}
|
||||
|
||||
TraceEvent("FastRestore").detail("Role", getRoleStr(role)).detail("WorkerNode", workerInterf.first);
|
||||
requests[workerInterf.first] = RestoreRecruitRoleRequest(role, nodeIndex);
|
||||
nodeIndex++;
|
||||
}
|
||||
|
||||
state std::vector<RestoreRecruitRoleReply> replies;
|
||||
wait( getBatchReplies(&RestoreWorkerInterface::recruitRole, masterWorker->workerInterfaces, requests, &replies) );
|
||||
for (auto& reply : replies) {
|
||||
if ( reply.role == RestoreRole::Applier ) {
|
||||
ASSERT_WE_THINK(reply.applier.present());
|
||||
masterData->appliersInterf[reply.applier.get().id()] = reply.applier.get();
|
||||
} else if ( reply.role == RestoreRole::Loader ) {
|
||||
ASSERT_WE_THINK(reply.loader.present());
|
||||
masterData->loadersInterf[reply.loader.get().id()] = reply.loader.get();
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestore").detail("RecruitRestoreRoles_InvalidRole", reply.role);
|
||||
}
|
||||
}
|
||||
TraceEvent("FastRestore").detail("RecruitRestoreRolesDone", masterWorker->workerInterfaces.size());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> distributeRestoreSysInfo(Reference<RestoreWorkerData> masterWorker, Reference<RestoreMasterData> masterData) {
|
||||
ASSERT( masterData.isValid() );
|
||||
ASSERT( !masterData->loadersInterf.empty() );
|
||||
RestoreSysInfo sysInfo(masterData->appliersInterf);
|
||||
std::vector<std::pair<UID, RestoreSysInfoRequest>> requests;
|
||||
for (auto &loader : masterData->loadersInterf) {
|
||||
requests.push_back( std::make_pair(loader.first, RestoreSysInfoRequest(sysInfo)) );
|
||||
}
|
||||
|
||||
TraceEvent("FastRestore").detail("DistributeRestoreSysInfoToLoaders", masterData->loadersInterf.size());
|
||||
wait( sendBatchRequests(&RestoreLoaderInterface::updateRestoreSysInfo, masterData->loadersInterf, requests) );
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// The server of the restore master. It drives the restore progress with the following steps:
|
||||
// 1) Lock database and clear the normal keyspace
|
||||
// 2) Wait on each RestoreRequest, which is sent by RestoreAgent operated by DBA
|
||||
|
@ -58,10 +136,8 @@ void dummySampleWorkload(Reference<RestoreMasterData> self);
|
|||
// 3.3) Construct requests of which file should be loaded by which loader, and send requests to loaders;
|
||||
// 4) After process all restore requests, finish restore by cleaning up the restore related system key
|
||||
// and ask all restore roles to quit.
|
||||
ACTOR Future<Void> startRestoreMaster(Reference<RestoreMasterData> self, Database cx) {
|
||||
state int checkNum = 0;
|
||||
ACTOR Future<Void> startProcessRestoreRequests(Reference<RestoreMasterData> self, Database cx) {
|
||||
state UID randomUID = g_random->randomUniqueID();
|
||||
|
||||
TraceEvent("FastRestore").detail("RestoreMaster", "WaitOnRestoreRequests");
|
||||
state Standalone<VectorRef<RestoreRequest>> restoreRequests = wait( collectRestoreRequests(cx) );
|
||||
|
||||
|
@ -70,20 +146,25 @@ ACTOR Future<Void> startRestoreMaster(Reference<RestoreMasterData> self, Databas
|
|||
wait( _clearDB(cx) );
|
||||
|
||||
// Step: Perform the restore requests
|
||||
for ( auto &it : restoreRequests ) {
|
||||
TraceEvent("FastRestore").detail("RestoreRequestInfo", it.toString());
|
||||
Version ver = wait( processRestoreRequest(it, self, cx) );
|
||||
state int restoreIndex = 0;
|
||||
try {
|
||||
for ( restoreIndex = 0; restoreIndex < restoreRequests.size(); restoreIndex++ ) {
|
||||
RestoreRequest& request = restoreRequests[restoreIndex];
|
||||
TraceEvent("FastRestore").detail("RestoreRequestInfo", request.toString());
|
||||
Version ver = wait( processRestoreRequest(request, self, cx) );
|
||||
}
|
||||
} catch(Error &e) {
|
||||
TraceEvent(SevError, "FastRestoreFailed").detail("RestoreRequest", restoreRequests[restoreIndex].toString());
|
||||
}
|
||||
|
||||
|
||||
// Step: Notify all restore requests have been handled by cleaning up the restore keys
|
||||
wait( notifyRestoreCompleted(self, cx) );
|
||||
|
||||
try {
|
||||
wait( unlockDatabase(cx,randomUID) );
|
||||
} catch(Error &e) {
|
||||
printf(" unlockDB fails. uid:%s\n", randomUID.toString().c_str());
|
||||
TraceEvent(SevError, "UnlockDBFailed").detail("UID", randomUID.toString());
|
||||
}
|
||||
|
||||
|
||||
TraceEvent("FastRestore").detail("RestoreMasterComplete", self->id());
|
||||
|
||||
|
@ -91,9 +172,15 @@ ACTOR Future<Void> startRestoreMaster(Reference<RestoreMasterData> self, Databas
|
|||
}
|
||||
|
||||
ACTOR static Future<Version> processRestoreRequest(RestoreRequest request, Reference<RestoreMasterData> self, Database cx) {
|
||||
wait( _collectBackupFiles(self, cx, request) );
|
||||
self->constructFilesWithVersionRange();
|
||||
self->buildVersionBatches();
|
||||
state std::vector<RestoreFileFR> files;
|
||||
state std::vector<RestoreFileFR> allFiles;
|
||||
|
||||
self->initBackupContainer(request.url);
|
||||
|
||||
wait( _collectBackupFiles(self->bc, &files, cx, request) ); // Get all backup files' description and save them to files
|
||||
self->constructFilesWithVersionRange(files, allFiles); // Assign modified files to allFiles
|
||||
self->buildVersionBatches(allFiles, self->versionBatches); // Divide files into version batches
|
||||
|
||||
state std::map<Version, VersionBatch>::iterator versionBatch;
|
||||
for (versionBatch = self->versionBatches.begin(); versionBatch != self->versionBatches.end(); versionBatch++) {
|
||||
wait( initializeVersionBatch(self) );
|
||||
|
@ -237,10 +324,10 @@ ACTOR static Future<Standalone<VectorRef<RestoreRequest>>> collectRestoreRequest
|
|||
return restoreRequests;
|
||||
}
|
||||
|
||||
// NOTE: This function can now get the backup file descriptors
|
||||
ACTOR static Future<Void> _collectBackupFiles(Reference<RestoreMasterData> self, Database cx, RestoreRequest request) {
|
||||
self->initBackupContainer(request.url);
|
||||
state BackupDescription desc = wait(self->bc->describeBackup());
|
||||
// Collect the backup files' description into output_files by reading the backupContainer bc.
|
||||
ACTOR static Future<Void> _collectBackupFiles(Reference<IBackupContainer> bc, std::vector<RestoreFileFR>* output_files, Database cx, RestoreRequest request) {
|
||||
state std::vector<RestoreFileFR> &files = *output_files;
|
||||
state BackupDescription desc = wait(bc->describeBackup());
|
||||
|
||||
// TODO: Delete this and see if it works
|
||||
wait(desc.resolveVersionTimes(cx));
|
||||
|
@ -249,27 +336,27 @@ ACTOR static Future<Void> _collectBackupFiles(Reference<RestoreMasterData> self,
|
|||
if(request.targetVersion == invalidVersion && desc.maxRestorableVersion.present())
|
||||
request.targetVersion = desc.maxRestorableVersion.get();
|
||||
|
||||
Optional<RestorableFileSet> restorable = wait(self->bc->getRestoreSet(request.targetVersion));
|
||||
Optional<RestorableFileSet> restorable = wait(bc->getRestoreSet(request.targetVersion));
|
||||
|
||||
if(!restorable.present()) {
|
||||
TraceEvent(SevWarn, "FastRestore").detail("NotRestorable", request.targetVersion);
|
||||
throw restore_missing_data();
|
||||
}
|
||||
|
||||
if (!self->files.empty()) {
|
||||
TraceEvent(SevError, "FastRestore").detail("ClearOldFiles", self->files.size());
|
||||
self->files.clear();
|
||||
if (!files.empty()) {
|
||||
TraceEvent(SevError, "FastRestore").detail("ClearOldFiles", files.size());
|
||||
files.clear();
|
||||
}
|
||||
|
||||
for(const RangeFile &f : restorable.get().ranges) {
|
||||
TraceEvent("FastRestore").detail("RangeFile", f.toString());
|
||||
RestoreFileFR file(f.version, f.fileName, true, f.blockSize, f.fileSize, f.version, f.version);
|
||||
self->files.push_back(file);
|
||||
files.push_back(file);
|
||||
}
|
||||
for(const LogFile &f : restorable.get().logs) {
|
||||
TraceEvent("FastRestore").detail("LogFile", f.toString());
|
||||
RestoreFileFR file(f.beginVersion, f.fileName, false, f.blockSize, f.fileSize, f.endVersion, f.beginVersion);
|
||||
self->files.push_back(file);
|
||||
files.push_back(file);
|
||||
}
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "fdbserver/CoordinationInterface.h"
|
||||
#include "fdbserver/RestoreUtil.h"
|
||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||
#include "fdbserver/RestoreWorker.actor.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // has to be last include
|
||||
|
||||
|
@ -57,10 +58,6 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMast
|
|||
std::map<Standalone<KeyRef>, UID> range2Applier; // KeyRef is the inclusive lower bound of the key range the applier (UID) is responsible for
|
||||
std::map<Version, VersionBatch> versionBatches; // key is the beginVersion of the version batch
|
||||
|
||||
// Temporary variables to hold files and data to restore
|
||||
std::vector<RestoreFileFR> allFiles; // All backup files to be processed in all version batches
|
||||
std::vector<RestoreFileFR> files; // Backup files to be parsed and applied: range and log files in 1 version batch
|
||||
|
||||
int batchIndex;
|
||||
|
||||
Reference<IBackupContainer> bc; // Backup container is used to read backup files
|
||||
|
@ -81,7 +78,8 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMast
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
void buildVersionBatches() {
|
||||
// Split allFiles into multiple versionBatches based on files' version
|
||||
void buildVersionBatches(const std::vector<RestoreFileFR>& allFiles, std::map<Version, VersionBatch>& versionBatches) {
|
||||
// A version batch includes a log file
|
||||
// Because log file's verion range does not overlap, we use log file's version range as the version range of a version batch
|
||||
// Create a version batch for a log file
|
||||
|
@ -133,7 +131,8 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMast
|
|||
}
|
||||
}
|
||||
|
||||
void constructFilesWithVersionRange() {
|
||||
// Parse file's name to get beginVersion and endVersion of the file; and assign files to allFiles
|
||||
void constructFilesWithVersionRange(std::vector<RestoreFileFR> &files, std::vector<RestoreFileFR>& allFiles) {
|
||||
printf("[INFO] constructFilesWithVersionRange for num_files:%ld\n", files.size());
|
||||
allFiles.clear();
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
|
@ -176,7 +175,7 @@ struct RestoreMasterData : RestoreRoleData, public ReferenceCounted<RestoreMast
|
|||
}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> startRestoreMaster(Reference<RestoreMasterData> self, Database cx);
|
||||
ACTOR Future<Void> startRestoreMaster(Reference<RestoreWorkerData> masterWorker, Database cx);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
|
@ -37,7 +37,7 @@ struct RestoreWorkerData;
|
|||
// id is the id of the worker to be monitored
|
||||
// This actor is used for both restore loader and restore applier
|
||||
ACTOR Future<Void> handleHeartbeat(RestoreSimpleRequest req, UID id) {
|
||||
wait( delay(g_random->random01() + 0.01) ); // Random jitter reduces heat beat monitor's pressure
|
||||
wait( delayJittered(5.0) ); // Random jitter reduces heat beat monitor's pressure
|
||||
req.reply.send(RestoreCommonReply(id));
|
||||
|
||||
return Void();
|
||||
|
|
|
@ -35,12 +35,12 @@
|
|||
#include "flow/genericactors.actor.h"
|
||||
#include "flow/Hash3.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "fdbserver/RestoreUtil.h"
|
||||
#include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||
#include "fdbserver/RestoreCommon.actor.h"
|
||||
#include "fdbserver/RestoreRoleCommon.actor.h"
|
||||
#include "fdbserver/RestoreLoader.actor.h"
|
||||
#include "fdbserver/RestoreApplier.actor.h"
|
||||
// #include "fdbserver/RestoreUtil.h"
|
||||
// #include "fdbserver/RestoreWorkerInterface.actor.h"
|
||||
// #include "fdbserver/RestoreCommon.actor.h"
|
||||
// #include "fdbserver/RestoreRoleCommon.actor.h"
|
||||
// #include "fdbserver/RestoreLoader.actor.h"
|
||||
// #include "fdbserver/RestoreApplier.actor.h"
|
||||
#include "fdbserver/RestoreMaster.actor.h"
|
||||
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
@ -61,43 +61,12 @@ ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Refer
|
|||
ACTOR Future<Void> monitorWorkerLiveness(Reference<RestoreWorkerData> self);
|
||||
ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Reference<RestoreWorkerData> self, ActorCollection *actors, Database cx);
|
||||
ACTOR Future<Void> collectRestoreWorkerInterface(Reference<RestoreWorkerData> self, Database cx, int min_num_workers = 2);
|
||||
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> self);
|
||||
ACTOR Future<Void> monitorleader(Reference<AsyncVar<RestoreWorkerInterface>> leader, Database cx, RestoreWorkerInterface myWorkerInterf);
|
||||
ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx);
|
||||
ACTOR Future<Void> handleRestoreSysInfoRequest(RestoreSysInfoRequest req, Reference<RestoreWorkerData> self);
|
||||
|
||||
template<> Tuple Codec<ERestoreState>::pack(ERestoreState const &val);
|
||||
template<> ERestoreState Codec<ERestoreState>::unpack(Tuple const &val);
|
||||
|
||||
// Each restore worker (a process) is assigned for a role.
|
||||
// MAYBE Later: We will support multiple restore roles on a worker
|
||||
struct RestoreWorkerData : NonCopyable, public ReferenceCounted<RestoreWorkerData> {
|
||||
UID workerID;
|
||||
std::map<UID, RestoreWorkerInterface> workerInterfaces; // UID is worker's node id, RestoreWorkerInterface is worker's communication workerInterface
|
||||
|
||||
// Restore Roles
|
||||
Optional<RestoreLoaderInterface> loaderInterf;
|
||||
Reference<RestoreLoaderData> loaderData;
|
||||
Optional<RestoreApplierInterface> applierInterf;
|
||||
Reference<RestoreApplierData> applierData;
|
||||
Reference<RestoreMasterData> masterData;
|
||||
|
||||
uint32_t inProgressFlag = 0; // To avoid race between duplicate message delivery that invokes the same actor multiple times
|
||||
|
||||
UID id() const { return workerID; };
|
||||
|
||||
RestoreWorkerData() = default;
|
||||
|
||||
~RestoreWorkerData() {
|
||||
printf("[Exit] Worker:%s RestoreWorkerData is deleted\n", workerID.toString().c_str());
|
||||
}
|
||||
|
||||
std::string describeNode() {
|
||||
std::stringstream ss;
|
||||
ss << "RestoreWorker workerID:" << workerID.toString();
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
// Remove the worker interface from restoreWorkerKey and remove its roles interfaces from their keys.
|
||||
ACTOR Future<Void> handlerTerminateWorkerRequest(RestoreSimpleRequest req, Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx) {
|
||||
|
@ -135,9 +104,8 @@ ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Refer
|
|||
DUMPTOKEN(recruited.initVersionBatch);
|
||||
DUMPTOKEN(recruited.collectRestoreRoleInterfaces);
|
||||
DUMPTOKEN(recruited.finishRestore);
|
||||
self->loaderData = Reference<RestoreLoaderData>( new RestoreLoaderData(self->loaderInterf.get().id(), req.nodeIndex) );
|
||||
actors->add( restoreLoaderCore(self->loaderData, self->loaderInterf.get(), cx) );
|
||||
TraceEvent("FastRestore").detail("LoaderRecruited", self->loaderData->id());
|
||||
actors->add( restoreLoaderCore(self->loaderInterf.get(), req.nodeIndex, cx) );
|
||||
TraceEvent("FastRestore").detail("RecruitedLoaderNodeIndex", req.nodeIndex);
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Loader, self->loaderInterf.get()));
|
||||
} else if (req.role == RestoreRole::Applier) {
|
||||
ASSERT( !self->applierInterf.present() );
|
||||
|
@ -149,9 +117,8 @@ ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Refer
|
|||
DUMPTOKEN(recruited.initVersionBatch);
|
||||
DUMPTOKEN(recruited.collectRestoreRoleInterfaces);
|
||||
DUMPTOKEN(recruited.finishRestore);
|
||||
self->applierData = Reference<RestoreApplierData>( new RestoreApplierData(self->applierInterf.get().id(), req.nodeIndex) );
|
||||
actors->add( restoreApplierCore(self->applierData, self->applierInterf.get(), cx) );
|
||||
TraceEvent("FastRestore").detail("ApplierRecruited", self->applierData->id());
|
||||
actors->add( restoreApplierCore(self->applierInterf.get(), req.nodeIndex, cx) );
|
||||
TraceEvent("FastRestore").detail("RecruitedApplierNodeIndex", req.nodeIndex);
|
||||
req.reply.send(RestoreRecruitRoleReply(self->id(), RestoreRole::Applier, self->applierInterf.get()));
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestore").detail("HandleRecruitRoleRequest", "UnknownRole"); //.detail("Request", req.printable());
|
||||
|
@ -160,26 +127,6 @@ ACTOR Future<Void> handleRecruitRoleRequest(RestoreRecruitRoleRequest req, Refer
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Assume: Only update the local data if it (applierInterf) has not been set
|
||||
ACTOR Future<Void> handleRestoreSysInfoRequest(RestoreSysInfoRequest req, Reference<RestoreWorkerData> self) {
|
||||
TraceEvent("FastRestore").detail("HandleRestoreSysInfoRequest", self->id());
|
||||
// Applier does not need to know appliers interfaces
|
||||
if ( !self->loaderData.isValid() ) {
|
||||
req.reply.send(RestoreCommonReply(self->id()));
|
||||
return Void();
|
||||
}
|
||||
// The loader has received the appliers interfaces
|
||||
if ( !self->loaderData->appliersInterf.empty() ) {
|
||||
req.reply.send(RestoreCommonReply(self->id()));
|
||||
return Void();
|
||||
}
|
||||
|
||||
self->loaderData->appliersInterf = req.sysInfo.appliers;
|
||||
|
||||
req.reply.send(RestoreCommonReply(self->id()) );
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
||||
// Read restoreWorkersKeys from DB to get each restore worker's restore workerInterface and set it to self->workerInterfaces
|
||||
// This is done before we assign restore roles for restore workers
|
||||
|
@ -242,69 +189,8 @@ void initRestoreWorkerConfig() {
|
|||
.detail("TxnBatchSize", opConfig.transactionBatchSizeThreshold);
|
||||
}
|
||||
|
||||
// RestoreWorker that has restore master role: Recruite a role for each worker
|
||||
ACTOR Future<Void> recruitRestoreRoles(Reference<RestoreWorkerData> self) {
|
||||
TraceEvent("FastRestore").detail("RecruitRestoreRoles", self->workerInterfaces.size())
|
||||
.detail("NumLoaders", opConfig.num_loaders).detail("NumAppliers", opConfig.num_appliers);
|
||||
|
||||
ASSERT( self->masterData.isValid() );
|
||||
ASSERT( opConfig.num_loaders > 0 && opConfig.num_appliers > 0 );
|
||||
ASSERT( opConfig.num_loaders + opConfig.num_appliers <= self->workerInterfaces.size() ); // We assign 1 role per worker for now
|
||||
|
||||
// Assign a role to each worker
|
||||
state int nodeIndex = 0;
|
||||
state RestoreRole role;
|
||||
std::map<UID, RestoreRecruitRoleRequest> requests;
|
||||
for (auto &workerInterf : self->workerInterfaces) {
|
||||
if ( nodeIndex >= 0 && nodeIndex < opConfig.num_appliers ) {
|
||||
// [0, numApplier) are appliers
|
||||
role = RestoreRole::Applier;
|
||||
} else if ( nodeIndex >= opConfig.num_appliers && nodeIndex < opConfig.num_loaders + opConfig.num_appliers ) {
|
||||
// [numApplier, numApplier + numLoader) are loaders
|
||||
role = RestoreRole::Loader;
|
||||
}
|
||||
|
||||
TraceEvent("FastRestore").detail("Role", getRoleStr(role)).detail("WorkerNode", workerInterf.first);
|
||||
requests[workerInterf.first] = RestoreRecruitRoleRequest(role, nodeIndex);
|
||||
nodeIndex++;
|
||||
}
|
||||
|
||||
state std::vector<RestoreRecruitRoleReply> replies;
|
||||
wait( getBatchReplies(&RestoreWorkerInterface::recruitRole, self->workerInterfaces, requests, &replies) );
|
||||
for (auto& reply : replies) {
|
||||
if ( reply.role == RestoreRole::Applier ) {
|
||||
ASSERT_WE_THINK(reply.applier.present());
|
||||
self->masterData->appliersInterf[reply.applier.get().id()] = reply.applier.get();
|
||||
} else if ( reply.role == RestoreRole::Loader ) {
|
||||
ASSERT_WE_THINK(reply.loader.present());
|
||||
self->masterData->loadersInterf[reply.loader.get().id()] = reply.loader.get();
|
||||
} else {
|
||||
TraceEvent(SevError, "FastRestore").detail("RecruitRestoreRoles_InvalidRole", reply.role);
|
||||
}
|
||||
}
|
||||
TraceEvent("FastRestore").detail("RecruitRestoreRolesDone", self->workerInterfaces.size());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> distributeRestoreSysInfo(Reference<RestoreWorkerData> self) {
|
||||
ASSERT( self->masterData.isValid() );
|
||||
ASSERT( !self->masterData->loadersInterf.empty() );
|
||||
RestoreSysInfo sysInfo(self->masterData->appliersInterf);
|
||||
std::vector<std::pair<UID, RestoreSysInfoRequest>> requests;
|
||||
for (auto &worker : self->workerInterfaces) {
|
||||
requests.push_back( std::make_pair(worker.first, RestoreSysInfoRequest(sysInfo)) );
|
||||
}
|
||||
|
||||
TraceEvent("FastRestore").detail("DistributeRestoreSysInfo", self->workerInterfaces.size());
|
||||
wait( sendBatchRequests(&RestoreWorkerInterface::updateRestoreSysInfo, self->workerInterfaces, requests) );
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// RestoreWorkerLeader is the worker that runs RestoreMaster role
|
||||
ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, RestoreWorkerInterface workerInterf, Database cx) {
|
||||
self->masterData = Reference<RestoreMasterData>(new RestoreMasterData());
|
||||
// We must wait for enough time to make sure all restore workers have registered their workerInterfaces into the DB
|
||||
printf("[INFO][Master] NodeID:%s Restore master waits for agents to register their workerKeys\n",
|
||||
workerInterf.id().toString().c_str());
|
||||
|
@ -316,12 +202,7 @@ ACTOR Future<Void> startRestoreWorkerLeader(Reference<RestoreWorkerData> self, R
|
|||
// TODO: Needs to keep this monitor's future. May use actorCollection
|
||||
state Future<Void> workersFailureMonitor = monitorWorkerLiveness(self);
|
||||
|
||||
// recruitRestoreRoles must be after collectWorkerInterface
|
||||
wait( recruitRestoreRoles(self) );
|
||||
|
||||
wait( distributeRestoreSysInfo(self) );
|
||||
|
||||
wait( startRestoreMaster(self->masterData, cx) );
|
||||
wait( startRestoreMaster(self, cx) );
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
@ -351,10 +232,6 @@ ACTOR Future<Void> startRestoreWorker(Reference<RestoreWorkerData> self, Restore
|
|||
requestTypeStr = "recruitRole";
|
||||
actors.add( handleRecruitRoleRequest(req, self, &actors, cx) );
|
||||
}
|
||||
when ( RestoreSysInfoRequest req = waitNext(interf.updateRestoreSysInfo.getFuture()) ) {
|
||||
requestTypeStr = "updateRestoreSysInfo";
|
||||
actors.add( handleRestoreSysInfoRequest(req, self) );
|
||||
}
|
||||
when ( RestoreSimpleRequest req = waitNext(interf.terminateWorker.getFuture()) ) {
|
||||
// Destroy the worker at the end of the restore
|
||||
requestTypeStr = "terminateWorker";
|
||||
|
|
|
@ -57,11 +57,14 @@ struct RestoreSysInfo;
|
|||
struct RestoreApplierInterface;
|
||||
|
||||
|
||||
// RestoreSysInfo includes information each (type of) restore roles should know.
|
||||
// At this moment, it only include appliers. We keep the name for future extension.
|
||||
// TODO: If it turns out this struct only has appliers in the final version, we will rename it to a more specific name, e.g., AppliersMap
|
||||
struct RestoreSysInfo {
|
||||
std::map<UID, RestoreApplierInterface> appliers;
|
||||
|
||||
RestoreSysInfo() = default;
|
||||
explicit RestoreSysInfo(std::map<UID, RestoreApplierInterface> appliers) : appliers(appliers) {}
|
||||
explicit RestoreSysInfo(const std::map<UID, RestoreApplierInterface> appliers) : appliers(appliers) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
|
@ -74,7 +77,6 @@ struct RestoreWorkerInterface {
|
|||
|
||||
RequestStream<RestoreSimpleRequest> heartbeat;
|
||||
RequestStream<RestoreRecruitRoleRequest> recruitRole;
|
||||
RequestStream<RestoreSysInfoRequest> updateRestoreSysInfo;
|
||||
RequestStream<RestoreSimpleRequest> terminateWorker;
|
||||
|
||||
bool operator == (RestoreWorkerInterface const& r) const { return id() == r.id(); }
|
||||
|
@ -87,7 +89,6 @@ struct RestoreWorkerInterface {
|
|||
void initEndpoints() {
|
||||
heartbeat.getEndpoint( TaskClusterController );
|
||||
recruitRole.getEndpoint( TaskClusterController );// Q: Why do we need this?
|
||||
updateRestoreSysInfo.getEndpoint(TaskClusterController);
|
||||
terminateWorker.getEndpoint( TaskClusterController );
|
||||
|
||||
interfID = g_random->randomUniqueID();
|
||||
|
@ -95,7 +96,7 @@ struct RestoreWorkerInterface {
|
|||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, interfID, heartbeat, updateRestoreSysInfo, recruitRole, terminateWorker);
|
||||
serializer(ar, interfID, heartbeat, recruitRole, terminateWorker);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -125,6 +126,7 @@ struct RestoreRoleInterface {
|
|||
|
||||
struct RestoreLoaderInterface : RestoreRoleInterface {
|
||||
RequestStream<RestoreSimpleRequest> heartbeat;
|
||||
RequestStream<RestoreSysInfoRequest> updateRestoreSysInfo;
|
||||
RequestStream<RestoreSetApplierKeyRangeVectorRequest> setApplierKeyRangeVectorRequest;
|
||||
RequestStream<RestoreLoadFileRequest> loadFile;
|
||||
RequestStream<RestoreVersionBatchRequest> initVersionBatch;
|
||||
|
@ -143,6 +145,7 @@ struct RestoreLoaderInterface : RestoreRoleInterface {
|
|||
|
||||
void initEndpoints() {
|
||||
heartbeat.getEndpoint( TaskClusterController );
|
||||
updateRestoreSysInfo.getEndpoint( TaskClusterController );
|
||||
setApplierKeyRangeVectorRequest.getEndpoint( TaskClusterController );
|
||||
loadFile.getEndpoint( TaskClusterController );
|
||||
initVersionBatch.getEndpoint( TaskClusterController );
|
||||
|
@ -152,7 +155,7 @@ struct RestoreLoaderInterface : RestoreRoleInterface {
|
|||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, * (RestoreRoleInterface*) this, heartbeat,
|
||||
serializer(ar, * (RestoreRoleInterface*) this, heartbeat, updateRestoreSysInfo,
|
||||
setApplierKeyRangeVectorRequest, loadFile,
|
||||
initVersionBatch, collectRestoreRoleInterfaces, finishRestore);
|
||||
}
|
||||
|
|
|
@ -206,7 +206,10 @@
|
|||
<ActorCompiler Include="RestoreRoleCommon.actor.h">
|
||||
<EnableCompile>false</EnableCompile>
|
||||
</ActorCompiler>
|
||||
<ActorCompiler Include="RestoreMaster.actor.h">
|
||||
<ActorCompiler Include="RestoreWorker.actor.h">
|
||||
<EnableCompile>false</EnableCompile>
|
||||
</ActorCompiler>
|
||||
<ActorCompiler Include="RestoreMaster.actor.h">
|
||||
<EnableCompile>false</EnableCompile>
|
||||
</ActorCompiler>
|
||||
<ActorCompiler Include="RestoreLoader.actor.h">
|
||||
|
|
Loading…
Reference in New Issue