2019-05-07 07:56:49 +08:00
|
|
|
/*
|
|
|
|
* RestoreApplier.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-05-13 12:53:09 +08:00
|
|
|
// This file defines the functions used by the RestoreApplier role.
|
|
|
|
// RestoreApplier role starts at restoreApplierCore actor
|
2019-05-10 11:55:44 +08:00
|
|
|
|
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
|
|
|
#include "fdbclient/SystemData.h"
|
|
|
|
#include "fdbclient/BackupAgent.actor.h"
|
|
|
|
#include "fdbclient/ManagementAPI.actor.h"
|
|
|
|
#include "fdbclient/MutationList.h"
|
|
|
|
#include "fdbclient/BackupContainer.h"
|
|
|
|
#include "fdbserver/RestoreCommon.actor.h"
|
|
|
|
#include "fdbserver/RestoreUtil.h"
|
|
|
|
#include "fdbserver/RestoreRoleCommon.actor.h"
|
|
|
|
#include "fdbserver/RestoreApplier.actor.h"
|
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-12-11 09:22:51 +08:00
|
|
|
ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMutationsRequest req,
|
2019-08-02 08:00:13 +08:00
|
|
|
Reference<RestoreApplierData> self);
|
|
|
|
ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
|
|
|
|
Database cx);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-07-25 07:59:05 +08:00
|
|
|
ACTOR Future<Void> restoreApplierCore(RestoreApplierInterface applierInterf, int nodeIndex, Database cx) {
|
2019-08-02 08:00:13 +08:00
|
|
|
state Reference<RestoreApplierData> self =
|
|
|
|
Reference<RestoreApplierData>(new RestoreApplierData(applierInterf.id(), nodeIndex));
|
2019-07-25 07:59:05 +08:00
|
|
|
|
2019-05-10 11:55:44 +08:00
|
|
|
state ActorCollection actors(false);
|
2019-05-23 04:30:33 +08:00
|
|
|
state Future<Void> exitRole = Never();
|
2019-05-10 11:55:44 +08:00
|
|
|
loop {
|
|
|
|
state std::string requestTypeStr = "[Init]";
|
|
|
|
|
|
|
|
try {
|
|
|
|
choose {
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreSimpleRequest req = waitNext(applierInterf.heartbeat.getFuture())) {
|
2019-05-10 11:55:44 +08:00
|
|
|
requestTypeStr = "heartbeat";
|
2019-05-23 04:30:33 +08:00
|
|
|
actors.add(handleHeartbeat(req, applierInterf.id()));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-12-11 09:22:51 +08:00
|
|
|
when(RestoreSendVersionedMutationsRequest req =
|
2019-08-02 08:00:13 +08:00
|
|
|
waitNext(applierInterf.sendMutationVector.getFuture())) {
|
2019-05-10 11:55:44 +08:00
|
|
|
requestTypeStr = "sendMutationVector";
|
2019-08-02 08:00:13 +08:00
|
|
|
actors.add(handleSendMutationVectorRequest(req, self));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreVersionBatchRequest req = waitNext(applierInterf.applyToDB.getFuture())) {
|
2019-05-10 11:55:44 +08:00
|
|
|
requestTypeStr = "applyToDB";
|
2019-08-02 08:00:13 +08:00
|
|
|
actors.add(handleApplyToDBRequest(req, self, cx));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(RestoreVersionBatchRequest req = waitNext(applierInterf.initVersionBatch.getFuture())) {
|
2019-05-10 11:55:44 +08:00
|
|
|
requestTypeStr = "initVersionBatch";
|
2020-01-18 03:06:07 +08:00
|
|
|
actors.add(handleInitVersionBatchRequest(req, self));
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2020-01-17 08:19:51 +08:00
|
|
|
when(RestoreFinishRequest req = waitNext(applierInterf.finishRestore.getFuture())) {
|
2019-05-11 07:48:01 +08:00
|
|
|
requestTypeStr = "finishRestore";
|
2019-10-24 06:05:03 +08:00
|
|
|
handleFinishRestoreRequest(req, self);
|
2020-01-17 08:19:51 +08:00
|
|
|
if (req.terminate) {
|
|
|
|
exitRole = Void();
|
|
|
|
}
|
2019-05-11 07:48:01 +08:00
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
when(wait(exitRole)) {
|
2019-06-05 02:40:23 +08:00
|
|
|
TraceEvent("FastRestore").detail("RestoreApplierCore", "ExitRole").detail("NodeID", self->id());
|
2019-05-23 04:30:33 +08:00
|
|
|
break;
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
2019-08-02 08:00:13 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevWarn, "FastRestore")
|
|
|
|
.detail("RestoreLoaderError", e.what())
|
|
|
|
.detail("RequestType", requestTypeStr);
|
2019-06-01 02:09:31 +08:00
|
|
|
break;
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
}
|
2019-06-01 02:09:31 +08:00
|
|
|
|
2019-05-10 11:55:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-05-31 02:18:24 +08:00
|
|
|
// The actor may be invovked multiple times and executed async.
|
2019-10-18 06:20:03 +08:00
|
|
|
// No race condition as long as we do not wait or yield when operate the shared data.
|
|
|
|
// Multiple such actors can run on different fileIDs, because mutations in different files belong to different versions;
|
|
|
|
// Only one actor can process mutations from the same file
|
2019-12-11 09:22:51 +08:00
|
|
|
ACTOR static Future<Void> handleSendMutationVectorRequest(RestoreSendVersionedMutationsRequest req,
|
2019-08-02 08:00:13 +08:00
|
|
|
Reference<RestoreApplierData> self) {
|
2020-01-15 06:18:41 +08:00
|
|
|
state Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
|
|
|
|
// Assume: processedFileState[req.asset] will not be erased while the actor is active.
|
2019-10-19 12:50:12 +08:00
|
|
|
// Note: Insert new items into processedFileState will not invalidate the reference.
|
2020-01-15 06:18:41 +08:00
|
|
|
state NotifiedVersion& curFilePos = batchData->processedFileState[req.asset];
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
TraceEvent("FastRestore")
|
|
|
|
.detail("ApplierNode", self->id())
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("VersionBatchIndex", req.batchIndex)
|
2019-12-20 08:50:39 +08:00
|
|
|
.detail("RestoreAsset", req.asset.toString())
|
2019-10-19 08:21:39 +08:00
|
|
|
.detail("ProcessedFileVersion", curFilePos.get())
|
2019-08-02 08:00:13 +08:00
|
|
|
.detail("Request", req.toString());
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-10-19 08:21:39 +08:00
|
|
|
wait(curFilePos.whenAtLeast(req.prevVersion));
|
2019-05-23 04:30:33 +08:00
|
|
|
|
2019-10-19 08:21:39 +08:00
|
|
|
if (curFilePos.get() == req.prevVersion) {
|
2019-12-05 09:11:40 +08:00
|
|
|
Version commitVersion = req.version;
|
2019-12-11 14:55:40 +08:00
|
|
|
MutationsVec mutations(req.mutations);
|
2019-12-21 11:25:33 +08:00
|
|
|
// Sanity check: mutations in range file is in [beginVersion, endVersion);
|
|
|
|
// mutations in log file is in [beginVersion, endVersion], both inclusive.
|
|
|
|
ASSERT_WE_THINK(commitVersion >= req.asset.beginVersion);
|
2020-01-07 12:32:26 +08:00
|
|
|
// Loader sends the endVersion to ensure all useful versions are sent
|
|
|
|
ASSERT_WE_THINK((req.isRangeFile && commitVersion <= req.asset.endVersion) ||
|
2019-12-21 14:00:36 +08:00
|
|
|
(!req.isRangeFile && commitVersion <= req.asset.endVersion));
|
2019-12-20 08:50:39 +08:00
|
|
|
|
2020-01-15 06:18:41 +08:00
|
|
|
if (batchData->kvOps.find(commitVersion) == batchData->kvOps.end()) {
|
|
|
|
batchData->kvOps.insert(std::make_pair(commitVersion, MutationsVec()));
|
2019-05-23 04:30:33 +08:00
|
|
|
}
|
2019-12-05 12:39:58 +08:00
|
|
|
for (int mIndex = 0; mIndex < mutations.size(); mIndex++) {
|
2019-05-23 04:30:33 +08:00
|
|
|
MutationRef mutation = mutations[mIndex];
|
2019-12-11 14:55:40 +08:00
|
|
|
TraceEvent(SevFRMutationInfo, "FastRestore")
|
2019-10-23 14:24:20 +08:00
|
|
|
.detail("ApplierNode", self->id())
|
2019-12-20 08:50:39 +08:00
|
|
|
.detail("RestoreAsset", req.asset.toString())
|
2019-10-23 14:24:20 +08:00
|
|
|
.detail("Version", commitVersion)
|
|
|
|
.detail("Index", mIndex)
|
|
|
|
.detail("MutationReceived", mutation.toString());
|
2019-12-20 08:50:39 +08:00
|
|
|
// Sanity check
|
|
|
|
if (g_network->isSimulated()) {
|
|
|
|
if (isRangeMutation(mutation)) {
|
|
|
|
ASSERT(mutation.param1 >= req.asset.range.begin &&
|
|
|
|
mutation.param2 <= req.asset.range.end); // Range mutation's right side is exclusive
|
|
|
|
} else {
|
|
|
|
ASSERT(mutation.param1 >= req.asset.range.begin && mutation.param1 < req.asset.range.end);
|
|
|
|
}
|
|
|
|
}
|
2020-01-15 06:18:41 +08:00
|
|
|
batchData->kvOps[commitVersion].push_back_deep(batchData->kvOps[commitVersion].arena(), mutation);
|
2019-11-05 03:47:29 +08:00
|
|
|
// TODO: What if log file's mutations are delivered out-of-order (behind) the range file's mutations?!
|
2019-05-23 04:30:33 +08:00
|
|
|
}
|
2019-10-19 08:21:39 +08:00
|
|
|
curFilePos.set(req.version);
|
2019-05-23 04:30:33 +08:00
|
|
|
}
|
|
|
|
|
2019-05-30 04:42:35 +08:00
|
|
|
req.reply.send(RestoreCommonReply(self->id()));
|
2019-05-23 04:30:33 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-10-15 05:57:15 +08:00
|
|
|
// Progress and checkpoint for applying (atomic) mutations in transactions to DB
|
|
|
|
struct DBApplyProgress {
|
|
|
|
// Mutation state in the current uncommitted transaction
|
|
|
|
VersionedMutationsMap::iterator curItInCurTxn;
|
|
|
|
int curIndexInCurTxn;
|
|
|
|
|
|
|
|
// Save the starting point for current txn to handle (commit_unknown_result) error in txn commit
|
|
|
|
// startItInUncommittedTxn is starting iterator in the most recent uncommitted (and failed) txn
|
|
|
|
// startIndexInUncommittedTxn is start index in the most recent uncommitted (and failed) txn.
|
|
|
|
// Note: Txns have different number of mutations
|
|
|
|
VersionedMutationsMap::iterator startItInUncommittedTxn;
|
|
|
|
int startIndexInUncommittedTxn;
|
|
|
|
|
|
|
|
// State to decide if a txn succeeds or not when txn error (commit_unknown_result) happens;
|
|
|
|
// curTxnId: The id of the current uncommitted txn, which monotonically increase for each successful transaction
|
|
|
|
// uncommittedTxnId: The id of the most recent succeeded txn. Used to recover the failed txn id in retry
|
|
|
|
// lastTxnHasError: Does the last txn has error. TODO: Only need to handle txn_commit_unknown error
|
|
|
|
Version curTxnId;
|
|
|
|
Version uncommittedTxnId;
|
|
|
|
bool lastTxnHasError;
|
|
|
|
|
|
|
|
// Decide when to commit a transaction. We buffer enough mutations in a txn before commit the txn
|
|
|
|
bool startNextVersion; // The next txn will include mutations in next version
|
|
|
|
int numAtomicOps;
|
|
|
|
double transactionSize;
|
|
|
|
|
2020-01-15 06:18:41 +08:00
|
|
|
Reference<ApplierBatchData> batchData;
|
|
|
|
UID applierId;
|
2019-10-15 05:57:15 +08:00
|
|
|
|
|
|
|
DBApplyProgress() = default;
|
2020-01-15 06:18:41 +08:00
|
|
|
explicit DBApplyProgress(UID applierId, Reference<ApplierBatchData> batchData)
|
|
|
|
: applierId(applierId), batchData(batchData), curIndexInCurTxn(0), startIndexInUncommittedTxn(0), curTxnId(0),
|
|
|
|
uncommittedTxnId(0), lastTxnHasError(false), startNextVersion(false), numAtomicOps(0), transactionSize(0) {
|
|
|
|
curItInCurTxn = batchData->kvOps.begin();
|
|
|
|
while (curItInCurTxn != batchData->kvOps.end() && curItInCurTxn->second.empty()) {
|
2019-10-15 05:57:15 +08:00
|
|
|
curItInCurTxn++;
|
|
|
|
}
|
|
|
|
startItInUncommittedTxn = curItInCurTxn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Has all mutations been committed?
|
2020-01-15 06:18:41 +08:00
|
|
|
bool isDone() { return curItInCurTxn == batchData->kvOps.end(); }
|
2019-10-15 05:57:15 +08:00
|
|
|
|
|
|
|
// Set cursor for next mutation
|
|
|
|
void nextMutation() {
|
|
|
|
curIndexInCurTxn++;
|
2020-01-15 06:18:41 +08:00
|
|
|
while (curItInCurTxn != batchData->kvOps.end() && curIndexInCurTxn >= curItInCurTxn->second.size()) {
|
2019-10-15 05:57:15 +08:00
|
|
|
curIndexInCurTxn = 0;
|
|
|
|
curItInCurTxn++;
|
|
|
|
startNextVersion = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Setup for the next transaction; This should be done after nextMutation()
|
|
|
|
void nextTxn() {
|
|
|
|
transactionSize = 0;
|
|
|
|
numAtomicOps = 0;
|
|
|
|
lastTxnHasError = false;
|
|
|
|
startNextVersion = false;
|
|
|
|
|
|
|
|
curTxnId++;
|
|
|
|
|
|
|
|
startIndexInUncommittedTxn = curIndexInCurTxn;
|
|
|
|
startItInUncommittedTxn = curItInCurTxn;
|
|
|
|
uncommittedTxnId = curTxnId;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Rollback to the starting point of the uncommitted-and-failed transaction to
|
|
|
|
// re-execute uncommitted txn
|
|
|
|
void rollback() {
|
2019-10-15 07:18:54 +08:00
|
|
|
TraceEvent(SevWarn, "FastRestore_ApplyTxnError")
|
|
|
|
.detail("TxnStatusFailed", curTxnId)
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierId)
|
2019-10-15 07:18:54 +08:00
|
|
|
.detail("UncommittedTxnId", uncommittedTxnId)
|
|
|
|
.detail("CurIteratorVersion", curItInCurTxn->first)
|
|
|
|
.detail("StartIteratorVersionInUncommittedTxn", startItInUncommittedTxn->first)
|
|
|
|
.detail("CurrentIndexInFailedTxn", curIndexInCurTxn)
|
|
|
|
.detail("StartIndexInUncommittedTxn", startIndexInUncommittedTxn)
|
|
|
|
.detail("NumIncludedAtomicOps", numAtomicOps);
|
2019-10-15 05:57:15 +08:00
|
|
|
curItInCurTxn = startItInUncommittedTxn;
|
|
|
|
curIndexInCurTxn = startIndexInUncommittedTxn;
|
|
|
|
curTxnId = uncommittedTxnId;
|
|
|
|
|
|
|
|
numAtomicOps = 0;
|
|
|
|
transactionSize = 0;
|
|
|
|
startNextVersion = false;
|
|
|
|
lastTxnHasError = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool shouldCommit() {
|
2019-10-24 11:49:14 +08:00
|
|
|
return (!lastTxnHasError && (startNextVersion || transactionSize >= opConfig.transactionBatchSizeThreshold ||
|
2020-01-15 06:18:41 +08:00
|
|
|
curItInCurTxn == batchData->kvOps.end()));
|
2019-10-15 05:57:15 +08:00
|
|
|
}
|
2019-10-15 07:18:54 +08:00
|
|
|
|
|
|
|
bool hasError() { return lastTxnHasError; }
|
|
|
|
|
|
|
|
void setTxnError(Error& e) {
|
|
|
|
TraceEvent(SevWarnAlways, "FastRestore_ApplyTxnError")
|
|
|
|
.detail("TxnStatus", "?")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierId)
|
2019-10-15 07:18:54 +08:00
|
|
|
.detail("TxnId", curTxnId)
|
|
|
|
.detail("StartIndexInCurrentTxn", curIndexInCurTxn)
|
|
|
|
.detail("Version", curItInCurTxn->first)
|
|
|
|
.error(e, true);
|
|
|
|
lastTxnHasError = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
MutationRef getCurrentMutation() {
|
|
|
|
ASSERT_WE_THINK(curIndexInCurTxn < curItInCurTxn->second.size());
|
|
|
|
return curItInCurTxn->second[curIndexInCurTxn];
|
|
|
|
}
|
2019-10-15 05:57:15 +08:00
|
|
|
};
|
|
|
|
|
2020-01-15 06:18:41 +08:00
|
|
|
ACTOR Future<Void> applyToDB(UID applierID, int64_t batchIndex, Reference<ApplierBatchData> batchData, Database cx) {
|
2019-12-05 03:22:44 +08:00
|
|
|
// state variables must be defined at the start of actor to be initialized in the actor constructor
|
2019-12-04 13:27:06 +08:00
|
|
|
state std::string typeStr = "";
|
2019-11-26 13:13:27 +08:00
|
|
|
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
2020-01-15 06:18:41 +08:00
|
|
|
state DBApplyProgress progress(applierID, batchData);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
|
|
|
// Assume the process will not crash when it apply mutations to DB. The reply message can be lost though
|
2020-01-15 06:18:41 +08:00
|
|
|
if (batchData->kvOps.empty()) {
|
2019-10-11 08:36:38 +08:00
|
|
|
TraceEvent("FastRestore_ApplierTxn")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDBFinished", applierID)
|
2019-10-11 08:36:38 +08:00
|
|
|
.detail("Reason", "EmptyVersionMutation");
|
2019-05-10 11:55:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2020-01-15 06:18:41 +08:00
|
|
|
ASSERT_WE_THINK(batchData->kvOps.size());
|
2019-08-02 08:00:13 +08:00
|
|
|
TraceEvent("FastRestore")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierID)
|
|
|
|
.detail("FromVersion", batchData->kvOps.begin()->first)
|
|
|
|
.detail("EndVersion", batchData->kvOps.rbegin()->first);
|
2019-08-02 08:00:13 +08:00
|
|
|
|
2020-01-15 06:18:41 +08:00
|
|
|
batchData->sanityCheckMutationOps();
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-10-15 05:57:15 +08:00
|
|
|
if (progress.isDone()) {
|
2019-10-11 08:36:38 +08:00
|
|
|
TraceEvent("FastRestore_ApplierTxn")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDBFinished", applierID)
|
2019-10-11 08:36:38 +08:00
|
|
|
.detail("Reason", "NoMutationAtVersions");
|
2019-10-11 08:24:03 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-11-04 09:16:21 +08:00
|
|
|
// Sanity check the restoreApplierKeys, which should be empty at this point
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr->reset();
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2020-01-15 06:18:41 +08:00
|
|
|
Key begin = restoreApplierKeyFor(applierID, batchIndex, 0);
|
|
|
|
Key end = restoreApplierKeyFor(applierID, batchIndex, std::numeric_limits<int64_t>::max());
|
2019-11-04 09:16:21 +08:00
|
|
|
Standalone<RangeResultRef> txnIds = wait(tr->getRange(KeyRangeRef(begin, end), CLIENT_KNOBS->TOO_MANY));
|
|
|
|
if (txnIds.size() > 0) {
|
|
|
|
TraceEvent(SevError, "FastRestore_ApplyTxnStateNotClean").detail("TxnIds", txnIds.size());
|
|
|
|
for (auto& kv : txnIds) {
|
2020-01-15 06:18:41 +08:00
|
|
|
UID id;
|
|
|
|
int64_t index;
|
|
|
|
Version txnId;
|
|
|
|
std::tie(id, index, txnId) = decodeRestoreApplierKey(kv.key);
|
2019-11-04 09:16:21 +08:00
|
|
|
TraceEvent(SevError, "FastRestore_ApplyTxnStateNotClean")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("Applier", id)
|
|
|
|
.detail("BatchIndex", index)
|
|
|
|
.detail("ResidueTxnID", txnId);
|
2019-11-04 09:16:21 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr->onError(e));
|
|
|
|
}
|
|
|
|
}
|
2019-10-11 08:24:03 +08:00
|
|
|
|
|
|
|
loop { // Transaction retry loop
|
2019-05-10 11:55:44 +08:00
|
|
|
try {
|
2019-10-11 08:24:03 +08:00
|
|
|
// Check if the transaction succeeds
|
2019-10-15 07:18:54 +08:00
|
|
|
if (progress.hasError()) {
|
2019-10-11 08:24:03 +08:00
|
|
|
tr->reset();
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2020-01-15 06:18:41 +08:00
|
|
|
Optional<Value> txnSucceeded =
|
|
|
|
wait(tr->get(restoreApplierKeyFor(applierID, batchIndex, progress.curTxnId)));
|
2019-10-11 08:24:03 +08:00
|
|
|
if (!txnSucceeded.present()) {
|
2019-10-15 05:57:15 +08:00
|
|
|
progress.rollback();
|
2019-10-11 08:24:03 +08:00
|
|
|
continue;
|
|
|
|
} else {
|
2019-10-11 08:36:38 +08:00
|
|
|
TraceEvent(SevWarn, "FastRestore_ApplyTxnError")
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("TxnStatusSucceeded", progress.curTxnId)
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierID)
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("CurIteratorVersion", progress.curItInCurTxn->first)
|
|
|
|
.detail("CurrentIteratorMutations", progress.curItInCurTxn->second.size())
|
|
|
|
.detail("CurrentIndexInSucceedTxn", progress.curIndexInCurTxn)
|
|
|
|
.detail("NumIncludedAtomicOps", progress.numAtomicOps);
|
|
|
|
// Txn succeeded and exectue the same logic when txn succeeds
|
2019-10-11 08:24:03 +08:00
|
|
|
}
|
|
|
|
} else { // !lastTxnHasError: accumulate mutations in a txn
|
|
|
|
tr->reset();
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2019-10-11 08:36:38 +08:00
|
|
|
TraceEvent("FastRestore_ApplierTxn")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierID)
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("TxnId", progress.curTxnId)
|
2019-10-23 14:24:20 +08:00
|
|
|
.detail("CurrentIndexInCurrentTxn", progress.curIndexInCurTxn)
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("CurrentIteratorMutations", progress.curItInCurTxn->second.size())
|
|
|
|
.detail("Version", progress.curItInCurTxn->first);
|
2019-10-11 08:24:03 +08:00
|
|
|
|
|
|
|
// restoreApplierKeyFor(self->id(), curTxnId) to tell if txn succeeds at an unknown error
|
2020-01-15 06:18:41 +08:00
|
|
|
tr->set(restoreApplierKeyFor(applierID, batchIndex, progress.curTxnId), restoreApplierTxnValue);
|
2019-10-11 08:24:03 +08:00
|
|
|
|
2019-10-15 05:57:15 +08:00
|
|
|
while (1) { // Loop: Accumulate mutations in a transaction
|
2019-10-15 07:38:01 +08:00
|
|
|
MutationRef m = progress.getCurrentMutation();
|
2019-10-11 08:36:38 +08:00
|
|
|
|
2019-09-04 06:50:21 +08:00
|
|
|
if (m.type >= MutationRef::Type::SetValue && m.type <= MutationRef::Type::MAX_ATOMIC_OP) {
|
2019-05-10 11:55:44 +08:00
|
|
|
typeStr = typeString[m.type];
|
2019-10-11 08:36:38 +08:00
|
|
|
} else {
|
2019-06-05 13:17:08 +08:00
|
|
|
TraceEvent(SevError, "FastRestore").detail("InvalidMutationType", m.type);
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
|
|
|
|
2019-12-11 14:55:40 +08:00
|
|
|
TraceEvent(SevFRMutationInfo, "FastRestore")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierID)
|
2019-10-23 14:24:20 +08:00
|
|
|
.detail("Version", progress.curItInCurTxn->first)
|
|
|
|
.detail("Index", progress.curIndexInCurTxn)
|
|
|
|
.detail("Mutation", m.toString())
|
2019-10-24 11:49:14 +08:00
|
|
|
.detail("MutationSize", m.expectedSize())
|
|
|
|
.detail("TxnSize", progress.transactionSize);
|
2019-08-02 08:00:13 +08:00
|
|
|
if (m.type == MutationRef::SetValue) {
|
2019-05-10 11:55:44 +08:00
|
|
|
tr->set(m.param1, m.param2);
|
2019-08-02 08:00:13 +08:00
|
|
|
} else if (m.type == MutationRef::ClearRange) {
|
2019-05-10 11:55:44 +08:00
|
|
|
KeyRangeRef mutationRange(m.param1, m.param2);
|
|
|
|
tr->clear(mutationRange);
|
2019-08-02 08:00:13 +08:00
|
|
|
} else if (isAtomicOp((MutationRef::Type)m.type)) {
|
2019-05-10 11:55:44 +08:00
|
|
|
tr->atomicOp(m.param1, m.param2, m.type);
|
2019-10-15 05:57:15 +08:00
|
|
|
progress.numAtomicOps++;
|
2019-05-10 11:55:44 +08:00
|
|
|
} else {
|
2019-08-02 08:00:13 +08:00
|
|
|
TraceEvent(SevError, "FastRestore")
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("UnhandledMutationType", m.type)
|
|
|
|
.detail("TypeName", typeStr);
|
2019-05-10 11:55:44 +08:00
|
|
|
}
|
2019-10-11 08:24:03 +08:00
|
|
|
|
2019-10-15 05:57:15 +08:00
|
|
|
progress.transactionSize += m.expectedSize();
|
2019-08-02 08:00:13 +08:00
|
|
|
|
2019-10-23 14:24:20 +08:00
|
|
|
progress.nextMutation(); // Prepare for the next mutation
|
|
|
|
// commit per transactionBatchSizeThreshold bytes; and commit does not cross version boundary
|
2019-10-24 04:36:19 +08:00
|
|
|
if (progress.shouldCommit()) {
|
2019-10-11 08:24:03 +08:00
|
|
|
break; // Got enough mutation in the txn
|
2019-10-11 08:36:38 +08:00
|
|
|
}
|
2019-05-15 08:39:44 +08:00
|
|
|
}
|
2019-10-11 08:36:38 +08:00
|
|
|
} // !lastTxnHasError
|
2019-10-11 08:24:03 +08:00
|
|
|
|
|
|
|
// Commit the txn and prepare the starting point for next txn
|
2019-10-15 05:57:15 +08:00
|
|
|
if (progress.shouldCommit()) {
|
2019-05-10 11:55:44 +08:00
|
|
|
wait(tr->commit());
|
|
|
|
}
|
2019-10-23 14:24:20 +08:00
|
|
|
|
2019-10-15 07:18:54 +08:00
|
|
|
if (progress.isDone()) { // Are all mutations processed?
|
2019-10-11 08:24:03 +08:00
|
|
|
break;
|
|
|
|
}
|
2019-10-15 05:57:15 +08:00
|
|
|
progress.nextTxn();
|
2019-08-02 08:00:13 +08:00
|
|
|
} catch (Error& e) {
|
2019-10-11 08:36:38 +08:00
|
|
|
TraceEvent(SevWarnAlways, "FastRestore_ApplyTxnError")
|
|
|
|
.detail("TxnStatus", "?")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDB", applierID)
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("TxnId", progress.curTxnId)
|
2019-10-24 04:57:40 +08:00
|
|
|
.detail("CurrentIndexInCurrentTxn", progress.curIndexInCurTxn)
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("Version", progress.curItInCurTxn->first)
|
2019-10-15 03:52:13 +08:00
|
|
|
.error(e, true);
|
2019-10-15 05:57:15 +08:00
|
|
|
progress.lastTxnHasError = true;
|
2019-10-11 08:24:03 +08:00
|
|
|
// if (e.code() == commit_unknown_result) {
|
|
|
|
// lastTxnHasError = true;
|
|
|
|
// }
|
2019-05-10 11:55:44 +08:00
|
|
|
wait(tr->onError(e));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-11 08:36:38 +08:00
|
|
|
TraceEvent("FastRestore_ApplierTxn")
|
2020-01-15 06:18:41 +08:00
|
|
|
.detail("ApplierApplyToDBFinished", applierID)
|
2019-10-15 05:57:15 +08:00
|
|
|
.detail("CleanupCurTxnIds", progress.curTxnId);
|
2019-10-11 08:24:03 +08:00
|
|
|
// clean up txn ids
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr->reset();
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2019-11-04 09:16:21 +08:00
|
|
|
// Clear txnIds in [0, progress.curTxnId). We add 100 to curTxnId just to be safe.
|
2020-01-15 06:18:41 +08:00
|
|
|
tr->clear(KeyRangeRef(restoreApplierKeyFor(applierID, batchIndex, 0),
|
|
|
|
restoreApplierKeyFor(applierID, batchIndex, progress.curTxnId + 100)));
|
2019-10-11 08:24:03 +08:00
|
|
|
wait(tr->commit());
|
|
|
|
break;
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr->onError(e));
|
|
|
|
}
|
|
|
|
}
|
2019-12-04 04:58:11 +08:00
|
|
|
// House cleaning
|
2020-01-15 06:18:41 +08:00
|
|
|
batchData->kvOps.clear();
|
|
|
|
TraceEvent("FastRestore_ApplierTxn").detail("ApplierApplyToDBFinished", applierID);
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2019-05-30 04:26:17 +08:00
|
|
|
|
2019-08-02 08:00:13 +08:00
|
|
|
ACTOR static Future<Void> handleApplyToDBRequest(RestoreVersionBatchRequest req, Reference<RestoreApplierData> self,
|
|
|
|
Database cx) {
|
2020-01-15 06:18:41 +08:00
|
|
|
// Ensure batch i is applied before batch (i+1)
|
|
|
|
wait(self->finishedBatch.whenAtLeast(req.batchIndex-1));
|
|
|
|
|
2020-01-18 03:06:07 +08:00
|
|
|
if (self->finishedBatch.get() == req.batchIndex-1) {
|
|
|
|
Reference<ApplierBatchData> batchData = self->batch[req.batchIndex];
|
|
|
|
ASSERT(batchData.isValid());
|
|
|
|
TraceEvent("FastRestore")
|
|
|
|
.detail("ApplierApplyToDB", self->id())
|
|
|
|
.detail("VersionBatchIndex", req.batchIndex)
|
|
|
|
.detail("DBApplierPresent", batchData->dbApplier.present());
|
|
|
|
if (!batchData->dbApplier.present()) {
|
|
|
|
batchData->dbApplier = applyToDB(self->id(), req.batchIndex, batchData, cx);
|
|
|
|
}
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2020-01-18 03:06:07 +08:00
|
|
|
ASSERT(batchData->dbApplier.present());
|
2020-01-15 06:18:41 +08:00
|
|
|
|
2020-01-18 03:06:07 +08:00
|
|
|
wait(batchData->dbApplier.get());
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2020-01-17 02:54:10 +08:00
|
|
|
// Multiple actor invokation can wait on req.batchIndex-1;
|
|
|
|
// Avoid setting finishedBatch when finishedBatch > req.batchIndex
|
2020-01-18 03:06:07 +08:00
|
|
|
if (self->finishedBatch.get() == req.batchIndex-1) {
|
|
|
|
self->finishedBatch.set(req.batchIndex);
|
|
|
|
}
|
2020-01-17 02:54:10 +08:00
|
|
|
}
|
2019-06-05 02:40:23 +08:00
|
|
|
req.reply.send(RestoreCommonReply(self->id()));
|
2019-05-10 11:55:44 +08:00
|
|
|
|
2019-06-05 02:40:23 +08:00
|
|
|
return Void();
|
|
|
|
}
|