2019-05-07 07:56:49 +08:00
|
|
|
/*
|
|
|
|
* RestoreLoader.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-13 12:53:09 +08:00
|
|
|
// This file implements the functions and actors used by the RestoreLoader role.
|
|
|
|
// The RestoreLoader role starts with the restoreLoaderCore actor
|
|
|
|
|
2019-05-10 11:55:44 +08:00
|
|
|
#include "fdbclient/BackupContainer.h"
|
|
|
|
#include "fdbserver/RestoreLoader.actor.h"
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// SerializedMutationListMap:
|
|
|
|
// Key is the signature/version of the mutation list, Value is the mutation list (or part of the mutation list)
|
2019-10-18 01:12:15 +08:00
|
|
|
typedef std::map<Standalone<StringRef>, Standalone<StringRef>> SerializedMutationListMap;
|
|
|
|
// SerializedMutationPartMap:
|
|
|
|
// Key has the same semantics as SerializedMutationListMap; Value is the part number of the splitted mutation list
|
|
|
|
typedef std::map<Standalone<StringRef>, uint32_t> SerializedMutationPartMap;
|
2019-06-01 02:09:31 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
void splitMutation(Reference<RestoreLoaderData> self, MutationRef m, Arena& mvector_arena,
|
|
|
|
VectorRef<MutationRef>& mvector, Arena& nodeIDs_arena, VectorRef<UID>& nodeIDs);
|
2019-11-21 13:04:18 +08:00
|
|
|
void _parseSerializedMutation(std::map<LoadingParam, VersionedMutationsMap>::iterator kvOpsIter,
|
2019-12-03 06:33:31 +08:00
|
|
|
SerializedMutationListMap* mutationMap,
|
2019-12-23 09:16:40 +08:00
|
|
|
std::map<LoadingParam, MutationsVec>::iterator samplesIter, const RestoreAsset& asset);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-10-25 03:47:51 +08:00
|
|
|
void handleRestoreSysInfoRequest(const RestoreSysInfoRequest& req, Reference<RestoreLoaderData> self);
|
2019-12-20 08:50:39 +08:00
|
|
|
ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<RestoreLoaderData> self);
|
2019-11-13 08:28:09 +08:00
|
|
|
ACTOR Future<Void> handleSendMutationsRequest(RestoreSendMutationsToAppliersRequest req,
|
|
|
|
Reference<RestoreLoaderData> self);
|
2019-08-02 08:00:13 +08:00
|
|
|
ACTOR Future<Void> sendMutationsToApplier(Reference<RestoreLoaderData> self, VersionedMutationsMap* kvOps,
|
2019-12-21 14:00:36 +08:00
|
|
|
bool isRangeFile, Version startVersion, Version endVersion,
|
|
|
|
RestoreAsset asset);
|
2019-12-11 09:22:51 +08:00
|
|
|
ACTOR static Future<Void> _parseLogFileToMutationsOnLoader(NotifiedVersion* pProcessedFileOffset,
|
|
|
|
SerializedMutationListMap* mutationMap,
|
|
|
|
SerializedMutationPartMap* mutationPartMap,
|
2019-12-20 08:50:39 +08:00
|
|
|
Reference<IBackupContainer> bc, RestoreAsset asset);
|
2019-11-21 13:04:18 +08:00
|
|
|
ACTOR static Future<Void> _parseRangeFileToMutationsOnLoader(
|
2019-12-03 06:33:31 +08:00
|
|
|
std::map<LoadingParam, VersionedMutationsMap>::iterator kvOpsIter,
|
2019-12-05 03:22:44 +08:00
|
|
|
std::map<LoadingParam, MutationsVec>::iterator samplesIter, Reference<IBackupContainer> bc, Version version,
|
2019-12-20 08:50:39 +08:00
|
|
|
RestoreAsset asset);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-07-25 07:59:05 +08:00
|
|
|
ACTOR Future<Void> restoreLoaderCore(RestoreLoaderInterface loaderInterf, int nodeIndex, Database cx) {
|
2019-08-02 08:00:13 +08:00
|
|
|
state Reference<RestoreLoaderData> self =
|
|
|
|
Reference<RestoreLoaderData>(new RestoreLoaderData(loaderInterf.id(), nodeIndex));
|
2019-07-25 07:59:05 +08:00
|
|
|
|
2019-05-10 11:55:44 +08:00
|
|
|
state ActorCollection actors(false);
|
2019-05-23 04:30:33 +08:00
|
|
|
state Future<Void> exitRole = Never();
|
2019-05-10 11:55:44 +08:00
|
|
|
loop {
|
|
|
|
state std::string requestTypeStr = "[Init]";
|
|
|
|
|
|
|
|
try {
|
|
|
|
choose {
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreSimpleRequest req = waitNext(loaderInterf.heartbeat.getFuture())) {
|
2019-05-10 11:55:44 +08:00
|
|
|
requestTypeStr = "heartbeat";
|
2019-05-23 04:30:33 +08:00
|
|
|
actors.add(handleHeartbeat(req, loaderInterf.id()));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreSysInfoRequest req = waitNext(loaderInterf.updateRestoreSysInfo.getFuture())) {
|
2019-07-25 07:59:05 +08:00
|
|
|
requestTypeStr = "updateRestoreSysInfo";
|
2019-10-24 06:05:03 +08:00
|
|
|
handleRestoreSysInfoRequest(req, self);
|
2019-07-25 07:59:05 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreLoadFileRequest req = waitNext(loaderInterf.loadFile.getFuture())) {
|
2019-05-28 09:39:30 +08:00
|
|
|
requestTypeStr = "loadFile";
|
2019-05-10 11:55:44 +08:00
|
|
|
self->initBackupContainer(req.param.url);
|
2019-12-20 08:50:39 +08:00
|
|
|
actors.add(handleLoadFileRequest(req, self));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-11-13 08:28:09 +08:00
|
|
|
when(RestoreSendMutationsToAppliersRequest req = waitNext(loaderInterf.sendMutations.getFuture())) {
|
|
|
|
requestTypeStr = "sendMutations";
|
|
|
|
actors.add(handleSendMutationsRequest(req, self));
|
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreVersionBatchRequest req = waitNext(loaderInterf.initVersionBatch.getFuture())) {
|
2019-05-10 11:55:44 +08:00
|
|
|
requestTypeStr = "initVersionBatch";
|
2019-11-12 08:24:41 +08:00
|
|
|
wait(handleInitVersionBatchRequest(req, self));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreVersionBatchRequest req = waitNext(loaderInterf.finishRestore.getFuture())) {
|
2019-05-11 07:48:01 +08:00
|
|
|
requestTypeStr = "finishRestore";
|
2019-10-24 06:05:03 +08:00
|
|
|
handleFinishRestoreRequest(req, self);
|
|
|
|
exitRole = Void();
|
2019-05-11 07:48:01 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(wait(exitRole)) {
|
2019-06-05 02:40:23 +08:00
|
|
|
TraceEvent("FastRestore").detail("RestoreLoaderCore", "ExitRole").detail("NodeID", self->id());
|
2019-05-23 04:30:33 +08:00
|
|
|
break;
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevWarn, "FastRestore")
|
|
|
|
.detail("RestoreLoaderError", e.what())
|
|
|
|
.detail("RequestType", requestTypeStr);
|
2019-06-01 02:09:31 +08:00
|
|
|
break;
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
2019-06-01 02:09:31 +08:00
|
|
|
|
2019-05-10 11:55:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-07-25 07:59:05 +08:00
|
|
|
// Assume: Only update the local data if it (applierInterf) has not been set
|
2019-10-25 03:47:51 +08:00
|
|
|
void handleRestoreSysInfoRequest(const RestoreSysInfoRequest& req, Reference<RestoreLoaderData> self) {
|
2019-07-25 07:59:05 +08:00
|
|
|
TraceEvent("FastRestore").detail("HandleRestoreSysInfoRequest", self->id());
|
|
|
|
ASSERT(self.isValid());
|
2019-08-02 08:00:13 +08:00
|
|
|
|
2019-07-25 07:59:05 +08:00
|
|
|
// The loader has received the appliers interfaces
|
2019-08-02 08:00:13 +08:00
|
|
|
if (!self->appliersInterf.empty()) {
|
2019-07-25 07:59:05 +08:00
|
|
|
req.reply.send(RestoreCommonReply(self->id()));
|
2019-10-24 06:05:03 +08:00
|
|
|
return;
|
2019-07-25 07:59:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
self->appliersInterf = req.sysInfo.appliers;
|
2019-08-02 08:00:13 +08:00
|
|
|
|
|
|
|
req.reply.send(RestoreCommonReply(self->id()));
|
2019-07-25 07:59:05 +08:00
|
|
|
}
|
|
|
|
|
2019-05-28 09:39:30 +08:00
|
|
|
ACTOR Future<Void> _processLoadingParam(LoadingParam param, Reference<RestoreLoaderData> self) {
|
2019-12-11 14:55:40 +08:00
|
|
|
// Temporary data structure for parsing log files into (version, <K, V, mutationType>)
|
|
|
|
// Must use StandAlone to save mutations, otherwise, the mutationref memory will be corrupted
|
|
|
|
// mutationMap: Key is the unique identifier for a batch of mutation logs at the same version
|
|
|
|
state SerializedMutationListMap mutationMap;
|
|
|
|
state std::map<Standalone<StringRef>, uint32_t> mutationPartMap; // Sanity check the data parsing is correct
|
|
|
|
state NotifiedVersion processedFileOffset(0);
|
|
|
|
state std::vector<Future<Void>> fileParserFutures;
|
|
|
|
state std::map<LoadingParam, VersionedMutationsMap>::iterator kvOpsPerLPIter = self->kvOpsPerLP.end();
|
|
|
|
state std::map<LoadingParam, MutationsVec>::iterator samplesIter = self->sampleMutations.end();
|
|
|
|
|
2019-06-05 02:40:23 +08:00
|
|
|
// Q: How to record the param's fields inside LoadingParam Refer to storageMetrics
|
|
|
|
TraceEvent("FastRestore").detail("Loader", self->id()).detail("StartProcessLoadParam", param.toString());
|
2019-08-02 08:00:13 +08:00
|
|
|
ASSERT(param.blockSize > 0);
|
2019-12-20 08:50:39 +08:00
|
|
|
ASSERT(param.asset.offset % param.blockSize == 0); // Parse file must be at block bondary.
|
2019-11-13 08:28:09 +08:00
|
|
|
ASSERT(self->kvOpsPerLP.find(param) == self->kvOpsPerLP.end());
|
2019-12-11 14:55:40 +08:00
|
|
|
|
2019-11-14 02:57:21 +08:00
|
|
|
// NOTE: map's iterator is guaranteed to be stable, but pointer may not.
|
2019-11-21 13:04:18 +08:00
|
|
|
// state VersionedMutationsMap* kvOps = &self->kvOpsPerLP[param];
|
2019-11-22 14:47:01 +08:00
|
|
|
self->kvOpsPerLP.emplace(param, VersionedMutationsMap());
|
2019-12-05 03:22:44 +08:00
|
|
|
self->sampleMutations.emplace(param, MutationsVec());
|
2019-12-11 14:55:40 +08:00
|
|
|
kvOpsPerLPIter = self->kvOpsPerLP.find(param);
|
|
|
|
samplesIter = self->sampleMutations.find(param);
|
2019-06-05 02:40:23 +08:00
|
|
|
|
2019-12-23 09:16:40 +08:00
|
|
|
for (int64_t j = param.asset.offset; j < param.asset.len; j += param.blockSize) {
|
2019-12-21 13:44:40 +08:00
|
|
|
RestoreAsset subAsset = param.asset;
|
2019-12-21 14:24:32 +08:00
|
|
|
subAsset.offset = j;
|
|
|
|
subAsset.len = std::min<int64_t>(param.blockSize, param.asset.len - j);
|
2019-08-02 08:00:13 +08:00
|
|
|
if (param.isRangeFile) {
|
2019-12-21 14:00:36 +08:00
|
|
|
fileParserFutures.push_back(
|
|
|
|
_parseRangeFileToMutationsOnLoader(kvOpsPerLPIter, samplesIter, self->bc, param.endVersion, subAsset));
|
2019-05-28 09:39:30 +08:00
|
|
|
} else {
|
2019-12-20 08:50:39 +08:00
|
|
|
// TODO: Sanity check the log file's range is overlapped with the restored version range
|
2019-12-11 09:22:51 +08:00
|
|
|
fileParserFutures.push_back(_parseLogFileToMutationsOnLoader(&processedFileOffset, &mutationMap,
|
2019-12-21 13:44:40 +08:00
|
|
|
&mutationPartMap, self->bc, subAsset));
|
2019-05-28 09:39:30 +08:00
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
wait(waitForAll(fileParserFutures));
|
|
|
|
|
|
|
|
if (!param.isRangeFile) {
|
2019-12-20 08:50:39 +08:00
|
|
|
_parseSerializedMutation(kvOpsPerLPIter, &mutationMap, samplesIter, param.asset);
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
|
2019-12-20 08:50:39 +08:00
|
|
|
TraceEvent("FastRestore").detail("Loader", self->id()).detail("FinishLoadingFile", param.asset.filename);
|
2019-08-02 08:00:13 +08:00
|
|
|
|
2019-05-28 09:39:30 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-10-17 11:30:11 +08:00
|
|
|
// A loader can process multiple RestoreLoadFileRequest in parallel.
|
2019-12-20 08:50:39 +08:00
|
|
|
ACTOR Future<Void> handleLoadFileRequest(RestoreLoadFileRequest req, Reference<RestoreLoaderData> self) {
|
2019-08-02 08:00:13 +08:00
|
|
|
if (self->processedFileParams.find(req.param) == self->processedFileParams.end()) {
|
2019-06-05 02:40:23 +08:00
|
|
|
TraceEvent("FastRestore").detail("Loader", self->id()).detail("ProcessLoadParam", req.param.toString());
|
2019-12-03 06:33:31 +08:00
|
|
|
ASSERT(self->sampleMutations.find(req.param) == self->sampleMutations.end());
|
2019-06-01 02:09:31 +08:00
|
|
|
self->processedFileParams[req.param] = Never();
|
2019-08-02 08:00:13 +08:00
|
|
|
self->processedFileParams[req.param] = _processLoadingParam(req.param, self);
|
2019-11-05 03:47:29 +08:00
|
|
|
} else {
|
|
|
|
TraceEvent("FastRestore").detail("Loader", self->id()).detail("WaitOnProcessLoadParam", req.param.toString());
|
2019-05-14 16:49:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
ASSERT(self->processedFileParams.find(req.param) != self->processedFileParams.end());
|
|
|
|
wait(self->processedFileParams[req.param]); // wait on the processing of the req.param.
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-12-03 06:33:31 +08:00
|
|
|
req.reply.send(RestoreLoadFileReply(req.param, self->sampleMutations[req.param]));
|
|
|
|
// TODO: clear self->sampleMutations[req.param] memory to save memory on loader
|
2019-11-13 08:28:09 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR Future<Void> handleSendMutationsRequest(RestoreSendMutationsToAppliersRequest req,
|
|
|
|
Reference<RestoreLoaderData> self) {
|
2019-11-13 10:23:14 +08:00
|
|
|
state std::map<LoadingParam, VersionedMutationsMap>::iterator item = self->kvOpsPerLP.begin();
|
2019-12-11 14:55:40 +08:00
|
|
|
|
|
|
|
self->rangeToApplier = req.rangeToApplier;
|
2019-11-13 10:23:14 +08:00
|
|
|
for (; item != self->kvOpsPerLP.end(); item++) {
|
|
|
|
if (item->first.isRangeFile == req.useRangeFile) {
|
|
|
|
// Send the parsed mutation to applier who will apply the mutation to DB
|
|
|
|
wait(sendMutationsToApplier(self, &item->second, item->first.isRangeFile, item->first.prevVersion,
|
2019-12-20 08:50:39 +08:00
|
|
|
item->first.endVersion, item->first.asset));
|
2019-11-13 08:28:09 +08:00
|
|
|
}
|
|
|
|
}
|
2019-11-13 10:23:14 +08:00
|
|
|
|
2019-05-30 04:42:35 +08:00
|
|
|
req.reply.send(RestoreCommonReply(self->id()));
|
2019-05-10 11:55:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-06-01 02:09:31 +08:00
|
|
|
// TODO: This function can be revised better
|
2019-10-17 11:30:11 +08:00
|
|
|
// Assume: kvOps data are from the same file.
|
2019-08-02 08:00:13 +08:00
|
|
|
ACTOR Future<Void> sendMutationsToApplier(Reference<RestoreLoaderData> self, VersionedMutationsMap* pkvOps,
|
2019-12-20 08:50:39 +08:00
|
|
|
bool isRangeFile, Version startVersion, Version endVersion,
|
|
|
|
RestoreAsset asset) {
|
2019-08-02 08:00:13 +08:00
|
|
|
state VersionedMutationsMap& kvOps = *pkvOps;
|
2019-12-12 23:44:57 +08:00
|
|
|
state VersionedMutationsMap::iterator kvOp = kvOps.begin();
|
2019-05-23 04:30:33 +08:00
|
|
|
state int kvCount = 0;
|
|
|
|
state int splitMutationIndex = 0;
|
2019-12-04 04:58:11 +08:00
|
|
|
state std::vector<UID> applierIDs = self->getWorkingApplierIDs();
|
2019-12-11 09:22:51 +08:00
|
|
|
state std::vector<std::pair<UID, RestoreSendVersionedMutationsRequest>> requests;
|
2019-12-04 04:58:11 +08:00
|
|
|
state Version prevVersion = startVersion;
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-12-04 04:58:11 +08:00
|
|
|
TraceEvent("FastRestore_SendMutationToApplier")
|
|
|
|
.detail("Loader", self->id())
|
2019-08-02 08:00:13 +08:00
|
|
|
.detail("IsRangeFile", isRangeFile)
|
|
|
|
.detail("StartVersion", startVersion)
|
2019-10-17 11:30:11 +08:00
|
|
|
.detail("EndVersion", endVersion)
|
2019-12-20 08:50:39 +08:00
|
|
|
.detail("FileIndex", asset.filename);
|
2019-06-01 02:09:31 +08:00
|
|
|
|
2019-05-23 04:30:33 +08:00
|
|
|
// Ensure there is a mutation request sent at endVersion, so that applier can advance its notifiedVersion
|
2019-08-02 08:00:13 +08:00
|
|
|
if (kvOps.find(endVersion) == kvOps.end()) {
|
2019-12-07 14:00:40 +08:00
|
|
|
kvOps[endVersion] = MutationsVec(); // Empty mutation vector will be handled by applier
|
2019-05-23 04:30:33 +08:00
|
|
|
}
|
|
|
|
|
2019-06-01 02:09:31 +08:00
|
|
|
splitMutationIndex = 0;
|
|
|
|
kvCount = 0;
|
2019-08-02 08:00:13 +08:00
|
|
|
|
|
|
|
for (kvOp = kvOps.begin(); kvOp != kvOps.end(); kvOp++) {
|
2019-12-11 14:55:40 +08:00
|
|
|
// applierMutationsBuffer is the mutation vector to be sent to each applier
|
|
|
|
// applierMutationsSize is buffered mutation vector size for each applier
|
|
|
|
std::map<UID, MutationsVec> applierMutationsBuffer;
|
|
|
|
std::map<UID, double> applierMutationsSize;
|
2019-08-02 08:00:13 +08:00
|
|
|
for (auto& applierID : applierIDs) {
|
2019-12-07 14:00:40 +08:00
|
|
|
applierMutationsBuffer[applierID] = MutationsVec();
|
2019-06-01 02:09:31 +08:00
|
|
|
applierMutationsSize[applierID] = 0.0;
|
|
|
|
}
|
2019-12-11 14:55:40 +08:00
|
|
|
Version commitVersion = kvOp->first;
|
2019-12-05 12:39:58 +08:00
|
|
|
|
|
|
|
for (int mIndex = 0; mIndex < kvOp->second.size(); mIndex++) {
|
|
|
|
MutationRef kvm = kvOp->second[mIndex];
|
2019-06-01 02:09:31 +08:00
|
|
|
// Send the mutation to applier
|
2019-08-02 08:00:13 +08:00
|
|
|
if (isRangeMutation(kvm)) {
|
2019-12-07 15:16:49 +08:00
|
|
|
MutationsVec mvector;
|
|
|
|
Standalone<VectorRef<UID>> nodeIDs;
|
2019-06-01 02:09:31 +08:00
|
|
|
// Because using a vector of mutations causes overhead, and the range mutation should happen rarely;
|
|
|
|
// We handle the range mutation and key mutation differently for the benefit of avoiding memory copy
|
|
|
|
// WARNING: The splitMutation() may have bugs
|
|
|
|
splitMutation(self, kvm, mvector.arena(), mvector.contents(), nodeIDs.arena(), nodeIDs.contents());
|
|
|
|
ASSERT(mvector.size() == nodeIDs.size());
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
for (splitMutationIndex = 0; splitMutationIndex < mvector.size(); splitMutationIndex++) {
|
2019-06-01 02:09:31 +08:00
|
|
|
MutationRef mutation = mvector[splitMutationIndex];
|
|
|
|
UID applierID = nodeIDs[splitMutationIndex];
|
2019-08-02 08:00:13 +08:00
|
|
|
// printf("SPLITTED MUTATION: %d: mutation:%s applierID:%s\n", splitMutationIndex,
|
|
|
|
// mutation.toString().c_str(), applierID.toString().c_str());
|
|
|
|
applierMutationsBuffer[applierID].push_back_deep(applierMutationsBuffer[applierID].arena(), mutation);
|
2019-06-01 02:09:31 +08:00
|
|
|
applierMutationsSize[applierID] += mutation.expectedSize();
|
|
|
|
|
|
|
|
kvCount++;
|
2019-05-23 04:30:33 +08:00
|
|
|
}
|
2019-06-01 02:09:31 +08:00
|
|
|
} else { // mutation operates on a particular key
|
2019-12-04 04:58:11 +08:00
|
|
|
std::map<Key, UID>::iterator itlow = self->rangeToApplier.upper_bound(kvm.param1);
|
2019-06-05 02:40:23 +08:00
|
|
|
--itlow; // make sure itlow->first <= m.param1
|
2019-08-02 08:00:13 +08:00
|
|
|
ASSERT(itlow->first <= kvm.param1);
|
2019-06-01 02:09:31 +08:00
|
|
|
MutationRef mutation = kvm;
|
|
|
|
UID applierID = itlow->second;
|
2019-08-02 08:00:13 +08:00
|
|
|
// printf("KV--Applier: K:%s ApplierID:%s\n", kvm.param1.toString().c_str(),
|
|
|
|
// applierID.toString().c_str());
|
2019-06-01 02:09:31 +08:00
|
|
|
kvCount++;
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
applierMutationsBuffer[applierID].push_back_deep(applierMutationsBuffer[applierID].arena(), mutation);
|
2019-06-01 02:09:31 +08:00
|
|
|
applierMutationsSize[applierID] += mutation.expectedSize();
|
|
|
|
}
|
|
|
|
} // Mutations at the same version
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-07-25 07:59:05 +08:00
|
|
|
// Send the mutations to appliers for each version
|
2019-08-02 08:00:13 +08:00
|
|
|
for (auto& applierID : applierIDs) {
|
|
|
|
requests.push_back(std::make_pair(
|
2019-12-20 08:50:39 +08:00
|
|
|
applierID, RestoreSendVersionedMutationsRequest(asset, prevVersion, commitVersion, isRangeFile,
|
2019-12-11 09:22:51 +08:00
|
|
|
applierMutationsBuffer[applierID])));
|
2019-05-23 04:30:33 +08:00
|
|
|
}
|
2019-10-17 11:30:11 +08:00
|
|
|
TraceEvent(SevDebug, "FastRestore_Debug")
|
|
|
|
.detail("Loader", self->id())
|
|
|
|
.detail("PrevVersion", prevVersion)
|
|
|
|
.detail("CommitVersion", commitVersion)
|
2019-12-23 09:16:40 +08:00
|
|
|
.detail("Filename", asset.filename);
|
2019-10-17 11:30:11 +08:00
|
|
|
ASSERT(prevVersion < commitVersion);
|
2019-12-11 14:55:40 +08:00
|
|
|
prevVersion = commitVersion;
|
2019-08-02 08:00:13 +08:00
|
|
|
wait(sendBatchRequests(&RestoreApplierInterface::sendMutationVector, self->appliersInterf, requests));
|
2019-12-12 23:44:57 +08:00
|
|
|
|
2019-06-01 02:09:31 +08:00
|
|
|
requests.clear();
|
2019-12-12 23:44:57 +08:00
|
|
|
|
2019-10-17 11:30:11 +08:00
|
|
|
} // all versions of mutations in the same file
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-07-25 07:59:05 +08:00
|
|
|
TraceEvent("FastRestore").detail("LoaderSendMutationOnAppliers", kvCount);
|
2019-05-23 04:30:33 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-13 13:05:49 +08:00
|
|
|
// TODO: Add a unit test for this function
|
2019-08-02 08:00:13 +08:00
|
|
|
void splitMutation(Reference<RestoreLoaderData> self, MutationRef m, Arena& mvector_arena,
|
|
|
|
VectorRef<MutationRef>& mvector, Arena& nodeIDs_arena, VectorRef<UID>& nodeIDs) {
|
2019-05-10 11:55:44 +08:00
|
|
|
// mvector[i] should be mapped to nodeID[i]
|
|
|
|
ASSERT(mvector.empty());
|
|
|
|
ASSERT(nodeIDs.empty());
|
|
|
|
// key range [m->param1, m->param2)
|
2019-08-02 08:00:13 +08:00
|
|
|
std::map<Standalone<KeyRef>, UID>::iterator itlow, itup; // we will return [itlow, itup)
|
2019-09-04 06:50:21 +08:00
|
|
|
itlow = self->rangeToApplier.lower_bound(m.param1); // lower_bound returns the iterator that is >= m.param1
|
2019-08-02 08:00:13 +08:00
|
|
|
if (itlow->first > m.param1) {
|
2019-09-04 06:50:21 +08:00
|
|
|
if (itlow != self->rangeToApplier.begin()) {
|
2019-05-15 08:00:58 +08:00
|
|
|
--itlow;
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-04 06:50:21 +08:00
|
|
|
itup = self->rangeToApplier.upper_bound(m.param2); // return rmap::end if no key is after m.param2.
|
|
|
|
ASSERT(itup == self->rangeToApplier.end() || itup->first > m.param2);
|
2019-05-14 08:24:57 +08:00
|
|
|
|
2019-05-15 08:39:44 +08:00
|
|
|
std::map<Standalone<KeyRef>, UID>::iterator itApplier;
|
2019-05-14 08:24:57 +08:00
|
|
|
while (itlow != itup) {
|
2019-08-02 08:00:13 +08:00
|
|
|
Standalone<MutationRef> curm; // current mutation
|
2019-05-10 11:55:44 +08:00
|
|
|
curm.type = m.type;
|
2019-08-02 08:00:13 +08:00
|
|
|
// The first split mutation should starts with m.first.
|
2019-09-04 06:50:21 +08:00
|
|
|
// The later ones should start with the rangeToApplier boundary.
|
2019-08-02 08:00:13 +08:00
|
|
|
if (m.param1 > itlow->first) {
|
2019-05-15 08:00:58 +08:00
|
|
|
curm.param1 = m.param1;
|
|
|
|
} else {
|
|
|
|
curm.param1 = itlow->first;
|
|
|
|
}
|
2019-05-15 08:39:44 +08:00
|
|
|
itApplier = itlow;
|
2019-05-10 11:55:44 +08:00
|
|
|
itlow++;
|
2019-05-14 08:24:57 +08:00
|
|
|
if (itlow == itup) {
|
2019-08-02 08:00:13 +08:00
|
|
|
ASSERT(m.param2 <= normalKeys.end);
|
2019-05-14 08:24:57 +08:00
|
|
|
curm.param2 = m.param2;
|
2019-08-02 08:00:13 +08:00
|
|
|
} else if (m.param2 < itlow->first) {
|
2019-06-01 02:09:31 +08:00
|
|
|
UNREACHABLE();
|
2019-05-14 08:24:57 +08:00
|
|
|
curm.param2 = m.param2;
|
2019-05-10 11:55:44 +08:00
|
|
|
} else {
|
|
|
|
curm.param2 = itlow->first;
|
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
ASSERT(curm.param1 <= curm.param2);
|
2019-05-14 08:24:57 +08:00
|
|
|
mvector.push_back_deep(mvector_arena, curm);
|
2019-05-15 08:39:44 +08:00
|
|
|
nodeIDs.push_back(nodeIDs_arena, itApplier->second);
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// key_input format:
|
|
|
|
// [logRangeMutation.first][hash_value_of_commit_version:1B][bigEndian64(commitVersion)][bigEndian32(part)]
|
2019-05-31 02:18:24 +08:00
|
|
|
// value_input: serialized binary of mutations at the same version
|
2019-08-02 08:00:13 +08:00
|
|
|
bool concatenateBackupMutationForLogFile(std::map<Standalone<StringRef>, Standalone<StringRef>>* pMutationMap,
|
|
|
|
std::map<Standalone<StringRef>, uint32_t>* pMutationPartMap,
|
2019-12-20 08:50:39 +08:00
|
|
|
Standalone<StringRef> key_input, Standalone<StringRef> val_input,
|
2019-12-23 09:16:40 +08:00
|
|
|
const RestoreAsset& asset) {
|
2019-08-02 08:00:13 +08:00
|
|
|
SerializedMutationListMap& mutationMap = *pMutationMap;
|
|
|
|
std::map<Standalone<StringRef>, uint32_t>& mutationPartMap = *pMutationPartMap;
|
2019-11-23 05:12:04 +08:00
|
|
|
const int key_prefix_len = sizeof(uint8_t) + sizeof(Version) + sizeof(uint32_t);
|
2019-05-31 02:18:24 +08:00
|
|
|
|
2019-12-20 03:49:37 +08:00
|
|
|
BackupStringRefReader readerKey(key_input, restore_corrupted_data()); // read key_input!
|
2019-11-23 05:12:04 +08:00
|
|
|
int logRangeMutationFirstLength = key_input.size() - key_prefix_len;
|
2019-05-10 11:55:44 +08:00
|
|
|
bool concatenated = false;
|
|
|
|
|
2019-11-23 05:12:04 +08:00
|
|
|
ASSERT_WE_THINK(key_input.size() >= key_prefix_len);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
if (logRangeMutationFirstLength > 0) {
|
|
|
|
// Strip out the [logRangeMutation.first]; otherwise, the following readerKey.consume will produce wrong value
|
|
|
|
readerKey.consume(logRangeMutationFirstLength);
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
|
2019-08-02 09:20:42 +08:00
|
|
|
readerKey.consume<uint8_t>(); // uint8_t hashValue = readerKey.consume<uint8_t>()
|
2019-11-23 03:47:25 +08:00
|
|
|
Version commitVersion = readerKey.consumeNetworkUInt64();
|
2019-12-20 08:50:39 +08:00
|
|
|
// Skip mutations not in [asset.beginVersion, asset.endVersion), which is what we are only processing right now
|
|
|
|
if (commitVersion < asset.beginVersion || commitVersion >= asset.endVersion) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
uint32_t part = readerKey.consumeNetworkUInt32();
|
|
|
|
// Use commitVersion as id
|
2019-11-23 03:47:25 +08:00
|
|
|
Standalone<StringRef> id = StringRef((uint8_t*)&commitVersion, sizeof(Version));
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
if (mutationMap.find(id) == mutationMap.end()) {
|
2019-05-28 09:39:30 +08:00
|
|
|
mutationMap.insert(std::make_pair(id, val_input));
|
2019-08-02 08:00:13 +08:00
|
|
|
if (part != 0) {
|
2019-09-04 06:50:21 +08:00
|
|
|
TraceEvent(SevError, "FastRestore").detail("FirstPartNotZero", part).detail("KeyInput", getHexString(key_input));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-05-31 02:18:24 +08:00
|
|
|
mutationPartMap.insert(std::make_pair(id, part));
|
2019-09-04 06:50:21 +08:00
|
|
|
} else { // Concatenate the val string with the same commitVersion
|
2019-08-02 08:00:13 +08:00
|
|
|
mutationMap[id] =
|
|
|
|
mutationMap[id].contents().withSuffix(val_input.contents()); // Assign the new Areana to the map's value
|
|
|
|
if (part != (mutationPartMap[id] + 1)) {
|
2019-05-31 02:18:24 +08:00
|
|
|
// Check if the same range or log file has been processed more than once!
|
2019-09-04 06:50:21 +08:00
|
|
|
TraceEvent(SevError, "FastRestore")
|
|
|
|
.detail("CurrentPart1", mutationPartMap[id])
|
|
|
|
.detail("CurrentPart2", part)
|
|
|
|
.detail("KeyInput", getHexString(key_input))
|
|
|
|
.detail("Hint", "Check if the same range or log file has been processed more than once");
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-05-31 02:18:24 +08:00
|
|
|
mutationPartMap[id] = part;
|
2019-05-10 11:55:44 +08:00
|
|
|
concatenated = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return concatenated;
|
|
|
|
}
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// Parse the kv pair (version, serialized_mutation), which are the results parsed from log file, into
|
|
|
|
// (version, <K, V, mutationType>) pair;
|
|
|
|
// Put the parsed versioned mutations into *pkvOps.
|
|
|
|
//
|
|
|
|
// Input key: [commitVersion_of_the_mutation_batch:uint64_t];
|
|
|
|
// Input value: [includeVersion:uint64_t][val_length:uint32_t][encoded_list_of_mutations], where
|
|
|
|
// includeVersion is the serialized version in the batch commit. It is not the commitVersion in Input key.
|
|
|
|
//
|
|
|
|
// val_length is always equal to (val.size() - 12); otherwise,
|
|
|
|
// we may not get the entire mutation list for the version encoded_list_of_mutations:
|
|
|
|
// [mutation1][mutation2]...[mutationk], where
|
|
|
|
// a mutation is encoded as [type:uint32_t][keyLength:uint32_t][valueLength:uint32_t][keyContent][valueContent]
|
2019-11-21 13:04:18 +08:00
|
|
|
void _parseSerializedMutation(std::map<LoadingParam, VersionedMutationsMap>::iterator kvOpsIter,
|
2019-12-03 06:33:31 +08:00
|
|
|
SerializedMutationListMap* pmutationMap,
|
2019-12-23 09:16:40 +08:00
|
|
|
std::map<LoadingParam, MutationsVec>::iterator samplesIter, const RestoreAsset& asset) {
|
2019-11-14 02:57:21 +08:00
|
|
|
VersionedMutationsMap& kvOps = kvOpsIter->second;
|
2019-12-05 03:22:44 +08:00
|
|
|
MutationsVec& samples = samplesIter->second;
|
2019-08-02 08:00:13 +08:00
|
|
|
SerializedMutationListMap& mutationMap = *pmutationMap;
|
|
|
|
|
|
|
|
for (auto& m : mutationMap) {
|
2019-05-10 11:55:44 +08:00
|
|
|
StringRef k = m.first.contents();
|
|
|
|
StringRef val = m.second.contents();
|
|
|
|
|
2019-12-20 03:49:37 +08:00
|
|
|
BackupStringRefReader kReader(k, restore_corrupted_data());
|
2019-05-31 02:18:24 +08:00
|
|
|
uint64_t commitVersion = kReader.consume<uint64_t>(); // Consume little Endian data
|
2019-12-20 08:50:39 +08:00
|
|
|
// We have already filter the commit not in [beginVersion, endVersion) when we concatenate kv pair in log file
|
2019-12-23 09:16:40 +08:00
|
|
|
ASSERT_WE_THINK(asset.isInVersionRange(commitVersion));
|
2019-12-07 14:00:40 +08:00
|
|
|
kvOps.insert(std::make_pair(commitVersion, MutationsVec()));
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-12-20 03:49:37 +08:00
|
|
|
BackupStringRefReader vReader(val, restore_corrupted_data());
|
2019-05-31 02:18:24 +08:00
|
|
|
vReader.consume<uint64_t>(); // Consume the includeVersion
|
2019-11-23 03:47:25 +08:00
|
|
|
// TODO(xumengpanda): verify the protocol version is compatible and raise error if needed
|
|
|
|
|
|
|
|
// Parse little endian value, confirmed it is correct!
|
|
|
|
uint32_t val_length_decoded = vReader.consume<uint32_t>();
|
|
|
|
ASSERT(val_length_decoded == val.size() - sizeof(uint64_t) - sizeof(uint32_t));
|
2019-05-10 11:55:44 +08:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
// stop when reach the end of the string
|
2019-08-02 08:00:13 +08:00
|
|
|
if (vReader.eof()) { //|| *reader.rptr == 0xFF
|
2019-05-10 11:55:44 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
uint32_t type = vReader.consume<uint32_t>();
|
|
|
|
uint32_t kLen = vReader.consume<uint32_t>();
|
|
|
|
uint32_t vLen = vReader.consume<uint32_t>();
|
2019-08-02 08:00:13 +08:00
|
|
|
const uint8_t* k = vReader.consume(kLen);
|
|
|
|
const uint8_t* v = vReader.consume(vLen);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
MutationRef mutation((MutationRef::Type)type, KeyRef(k, kLen), KeyRef(v, vLen));
|
2019-12-23 09:16:40 +08:00
|
|
|
// Should this mutation be skipped?
|
|
|
|
if (mutation.param1 >= asset.range.end ||
|
|
|
|
(isRangeMutation(mutation) && mutation.param2 < asset.range.begin) ||
|
|
|
|
(!isRangeMutation(mutation) && mutation.param1 < asset.range.begin)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Only apply mutation within the asset.range
|
2019-12-20 08:50:39 +08:00
|
|
|
if (isRangeMutation(mutation)) {
|
|
|
|
mutation.param1 = mutation.param1 >= asset.range.begin ? mutation.param1 : asset.range.begin;
|
|
|
|
mutation.param2 = mutation.param2 < asset.range.end ? mutation.param2 : asset.range.end;
|
|
|
|
}
|
2019-12-23 09:16:40 +08:00
|
|
|
|
2019-11-05 08:10:08 +08:00
|
|
|
TraceEvent(SevFRMutationInfo, "FastRestore_VerboseDebug")
|
|
|
|
.detail("CommitVersion", commitVersion)
|
|
|
|
.detail("ParsedMutation", mutation.toString());
|
2019-05-28 09:39:30 +08:00
|
|
|
kvOps[commitVersion].push_back_deep(kvOps[commitVersion].arena(), mutation);
|
2019-12-05 12:39:58 +08:00
|
|
|
// Sampling (FASTRESTORE_SAMPLING_PERCENT%) data
|
|
|
|
if (deterministicRandom()->random01() * 100 < SERVER_KNOBS->FASTRESTORE_SAMPLING_PERCENT) {
|
2019-12-03 06:33:31 +08:00
|
|
|
samples.push_back_deep(samples.arena(), mutation);
|
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
ASSERT_WE_THINK(kLen >= 0 && kLen < val.size());
|
|
|
|
ASSERT_WE_THINK(vLen >= 0 && vLen < val.size());
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
// Parsing the data blocks in a range file
|
2019-11-21 13:04:18 +08:00
|
|
|
ACTOR static Future<Void> _parseRangeFileToMutationsOnLoader(
|
2019-12-03 06:33:31 +08:00
|
|
|
std::map<LoadingParam, VersionedMutationsMap>::iterator kvOpsIter,
|
2019-12-05 03:22:44 +08:00
|
|
|
std::map<LoadingParam, MutationsVec>::iterator samplesIter, Reference<IBackupContainer> bc, Version version,
|
2019-12-20 08:50:39 +08:00
|
|
|
RestoreAsset asset) {
|
2019-11-14 02:57:21 +08:00
|
|
|
state VersionedMutationsMap& kvOps = kvOpsIter->second;
|
2019-12-05 03:22:44 +08:00
|
|
|
state MutationsVec& sampleMutations = samplesIter->second;
|
2019-08-02 08:00:13 +08:00
|
|
|
|
2019-12-20 08:50:39 +08:00
|
|
|
TraceEvent("FastRestoreDecodedRangeFile")
|
|
|
|
.detail("Filename", asset.filename)
|
|
|
|
.detail("Version", version)
|
2019-12-21 14:00:36 +08:00
|
|
|
.detail("BeginVersion", asset.beginVersion)
|
|
|
|
.detail("EndVersion", asset.endVersion);
|
2019-12-21 14:24:32 +08:00
|
|
|
// Sanity check the range file is within the restored version range
|
2019-12-20 08:50:39 +08:00
|
|
|
ASSERT_WE_THINK(version >= asset.beginVersion && version < asset.endVersion);
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// The set of key value version is rangeFile.version. the key-value set in the same range file has the same version
|
2019-12-20 08:50:39 +08:00
|
|
|
Reference<IAsyncFile> inFile = wait(bc->readFile(asset.filename));
|
2019-11-26 14:31:53 +08:00
|
|
|
Standalone<VectorRef<KeyValueRef>> blockData =
|
2019-12-20 08:50:39 +08:00
|
|
|
wait(parallelFileRestore::decodeRangeFileBlock(inFile, asset.offset, asset.len));
|
|
|
|
TraceEvent("FastRestore")
|
|
|
|
.detail("DecodedRangeFile", asset.filename)
|
|
|
|
.detail("DataSize", blockData.contents().size());
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// First and last key are the range for this file
|
2019-11-26 14:31:53 +08:00
|
|
|
KeyRange fileRange = KeyRangeRef(blockData.front().key, blockData.back().key);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// If fileRange doesn't intersect restore range then we're done.
|
2019-12-20 08:50:39 +08:00
|
|
|
if (!fileRange.intersects(asset.range)) {
|
2019-08-02 08:00:13 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// We know the file range intersects the restore range but there could still be keys outside the restore range.
|
|
|
|
// Find the subvector of kv pairs that intersect the restore range.
|
|
|
|
// Note that the first and last keys are just the range endpoints for this file.
|
|
|
|
// They are metadata, not the real data.
|
|
|
|
int rangeStart = 1;
|
|
|
|
int rangeEnd = blockData.size() - 1; // The rangeStart and rangeEnd is [,)
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// Slide start from begining, stop if something in range is found
|
2019-05-10 11:55:44 +08:00
|
|
|
// Move rangeStart and rangeEnd until they is within restoreRange
|
2019-12-20 08:50:39 +08:00
|
|
|
while (rangeStart < rangeEnd && !asset.range.contains(blockData[rangeStart].key)) {
|
2019-05-10 11:55:44 +08:00
|
|
|
++rangeStart;
|
2019-05-31 02:18:24 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
// Side end from back, stop if something at (rangeEnd-1) is found in range
|
2019-12-20 08:50:39 +08:00
|
|
|
while (rangeEnd > rangeStart && !asset.range.contains(blockData[rangeEnd - 1].key)) {
|
2019-05-10 11:55:44 +08:00
|
|
|
--rangeEnd;
|
2019-05-31 02:18:24 +08:00
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
// Now data only contains the kv mutation within restoreRange
|
2019-11-26 14:31:53 +08:00
|
|
|
VectorRef<KeyValueRef> data = blockData.slice(rangeStart, rangeEnd);
|
|
|
|
int start = 0;
|
|
|
|
int end = data.size();
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
// Convert KV in data into mutations in kvOps
|
2019-08-02 08:00:13 +08:00
|
|
|
for (int i = start; i < end; ++i) {
|
|
|
|
// NOTE: The KV pairs in range files are the real KV pairs in original DB.
|
2019-12-11 08:29:08 +08:00
|
|
|
// Should NOT add prefix or remove surfix for the backup data!
|
2019-08-02 08:00:13 +08:00
|
|
|
MutationRef m(MutationRef::Type::SetValue, data[i].key,
|
|
|
|
data[i].value); // ASSUME: all operation in range file is set.
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
// We cache all kv operations into kvOps, and apply all kv operations later in one place
|
2019-12-07 14:00:40 +08:00
|
|
|
kvOps.insert(std::make_pair(version, MutationsVec()));
|
2019-11-05 08:10:08 +08:00
|
|
|
TraceEvent(SevFRMutationInfo, "FastRestore_VerboseDebug")
|
|
|
|
.detail("CommitVersion", version)
|
|
|
|
.detail("ParsedMutationKV", m.toString());
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
ASSERT_WE_THINK(kvOps.find(version) != kvOps.end());
|
|
|
|
kvOps[version].push_back_deep(kvOps[version].arena(), m);
|
2019-12-05 12:39:58 +08:00
|
|
|
// Sampling (FASTRESTORE_SAMPLING_PERCENT%) data
|
|
|
|
if (deterministicRandom()->random01() * 100 < SERVER_KNOBS->FASTRESTORE_SAMPLING_PERCENT) {
|
2019-12-03 06:33:31 +08:00
|
|
|
sampleMutations.push_back_deep(sampleMutations.arena(), m);
|
|
|
|
}
|
2019-05-31 02:18:24 +08:00
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
return Void();
|
2019-08-02 08:00:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Parse data blocks in a log file into a vector of <string, string> pairs. Each pair.second contains the mutations at a
|
|
|
|
// version encoded in pair.first Step 1: decodeLogFileBlock into <string, string> pairs Step 2: Concatenate the
|
|
|
|
// pair.second of pairs with the same pair.first.
|
2019-10-18 01:12:15 +08:00
|
|
|
ACTOR static Future<Void> _parseLogFileToMutationsOnLoader(NotifiedVersion* pProcessedFileOffset,
|
|
|
|
SerializedMutationListMap* pMutationMap,
|
|
|
|
SerializedMutationPartMap* pMutationPartMap,
|
2019-12-20 08:50:39 +08:00
|
|
|
Reference<IBackupContainer> bc, RestoreAsset asset) {
|
|
|
|
Reference<IAsyncFile> inFile = wait(bc->readFile(asset.filename));
|
2019-08-02 08:00:13 +08:00
|
|
|
// decodeLogFileBlock() must read block by block!
|
|
|
|
state Standalone<VectorRef<KeyValueRef>> data =
|
2019-12-20 08:50:39 +08:00
|
|
|
wait(parallelFileRestore::decodeLogFileBlock(inFile, asset.offset, asset.len));
|
2019-10-17 11:30:11 +08:00
|
|
|
TraceEvent("FastRestore")
|
2019-12-20 08:50:39 +08:00
|
|
|
.detail("DecodedLogFile", asset.filename)
|
|
|
|
.detail("Offset", asset.offset)
|
|
|
|
.detail("Length", asset.len)
|
2019-10-17 11:30:11 +08:00
|
|
|
.detail("DataSize", data.contents().size());
|
|
|
|
|
|
|
|
// Ensure data blocks in the same file are processed in order
|
2019-12-20 08:50:39 +08:00
|
|
|
wait(pProcessedFileOffset->whenAtLeast(asset.offset));
|
2019-10-17 11:30:11 +08:00
|
|
|
|
2019-12-20 08:50:39 +08:00
|
|
|
if (pProcessedFileOffset->get() == asset.offset) {
|
2019-11-26 14:31:53 +08:00
|
|
|
int start = 0;
|
|
|
|
int end = data.size();
|
|
|
|
int numConcatenated = 0;
|
2019-10-17 11:30:11 +08:00
|
|
|
for (int i = start; i < end; ++i) {
|
|
|
|
// Key k = data[i].key.withPrefix(mutationLogPrefix);
|
|
|
|
// ValueRef v = data[i].value;
|
|
|
|
// Concatenate the backuped param1 and param2 (KV) at the same version.
|
|
|
|
bool concatenated =
|
2019-12-20 08:50:39 +08:00
|
|
|
concatenateBackupMutationForLogFile(pMutationMap, pMutationPartMap, data[i].key, data[i].value, asset);
|
2019-10-17 11:30:11 +08:00
|
|
|
numConcatenated += (concatenated ? 1 : 0);
|
|
|
|
}
|
2019-12-20 08:50:39 +08:00
|
|
|
pProcessedFileOffset->set(asset.offset + asset.len);
|
2019-05-31 02:18:24 +08:00
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
|
|
|
return Void();
|
2019-08-02 08:00:13 +08:00
|
|
|
}
|