2019-04-25 06:12:37 +08:00
|
|
|
/*
|
2019-10-03 05:47:09 +08:00
|
|
|
* BackupWorker.actor.cpp
|
2019-04-25 06:12:37 +08:00
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2019-09-10 01:21:16 +08:00
|
|
|
#include "fdbclient/BackupContainer.h"
|
2020-01-08 02:27:52 +08:00
|
|
|
#include "fdbclient/DatabaseContext.h"
|
|
|
|
#include "fdbclient/MasterProxyInterface.h"
|
2019-05-24 07:06:23 +08:00
|
|
|
#include "fdbclient/SystemData.h"
|
2019-04-25 06:12:37 +08:00
|
|
|
#include "fdbserver/BackupInterface.h"
|
2019-09-10 01:21:16 +08:00
|
|
|
#include "fdbserver/LogProtocolMessage.h"
|
2019-04-25 06:12:37 +08:00
|
|
|
#include "fdbserver/LogSystem.h"
|
|
|
|
#include "fdbserver/ServerDBInfo.h"
|
|
|
|
#include "fdbserver/WaitFailure.h"
|
2019-05-24 07:06:23 +08:00
|
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
2019-04-25 06:12:37 +08:00
|
|
|
#include "flow/Error.h"
|
2019-12-04 08:05:12 +08:00
|
|
|
|
2019-04-25 06:12:37 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
|
2019-09-10 01:21:16 +08:00
|
|
|
struct VersionedMessage {
|
|
|
|
LogMessageVersion version;
|
|
|
|
StringRef message;
|
2019-11-13 08:44:59 +08:00
|
|
|
VectorRef<Tag> tags;
|
2019-09-17 06:56:23 +08:00
|
|
|
Arena arena; // Keep a reference to the memory containing the message
|
2019-09-10 01:21:16 +08:00
|
|
|
|
2019-11-13 08:44:59 +08:00
|
|
|
VersionedMessage(LogMessageVersion v, StringRef m, const VectorRef<Tag>& t, const Arena& a)
|
2019-09-17 06:56:23 +08:00
|
|
|
: version(v), message(m), tags(t), arena(a) {}
|
2019-09-10 01:21:16 +08:00
|
|
|
const Version getVersion() const { return version.version; }
|
|
|
|
const uint32_t getSubVersion() const { return version.sub; }
|
|
|
|
};
|
|
|
|
|
2019-04-25 06:12:37 +08:00
|
|
|
struct BackupData {
|
2019-05-24 07:06:23 +08:00
|
|
|
const UID myId;
|
2019-07-24 02:45:04 +08:00
|
|
|
const Tag tag; // LogRouter tag for this worker, i.e., (-2, i)
|
2019-05-24 07:06:23 +08:00
|
|
|
const Version startVersion;
|
2019-09-22 12:55:19 +08:00
|
|
|
const Optional<Version> endVersion; // old epoch's end version (inclusive), or empty for current epoch
|
2019-07-30 01:37:42 +08:00
|
|
|
const LogEpoch recruitedEpoch;
|
|
|
|
const LogEpoch backupEpoch;
|
2019-04-25 06:12:37 +08:00
|
|
|
Version minKnownCommittedVersion;
|
2019-08-12 11:15:50 +08:00
|
|
|
Version savedVersion, lastSeenVersion;
|
2019-04-25 06:12:37 +08:00
|
|
|
AsyncVar<Reference<ILogSystem>> logSystem;
|
2019-05-24 07:06:23 +08:00
|
|
|
Database cx;
|
2019-09-10 01:21:16 +08:00
|
|
|
std::vector<VersionedMessage> messages;
|
|
|
|
Reference<IBackupContainer> container;
|
2019-09-22 12:55:19 +08:00
|
|
|
AsyncVar<bool> pullFinished;
|
2019-04-25 06:12:37 +08:00
|
|
|
|
2019-07-24 02:45:04 +08:00
|
|
|
CounterCollection cc;
|
|
|
|
Future<Void> logger;
|
|
|
|
|
2019-09-29 03:48:28 +08:00
|
|
|
explicit BackupData(Reference<AsyncVar<ServerDBInfo>> db, const InitializeBackupRequest& req)
|
|
|
|
: myId(req.reqId), tag(req.routerTag), startVersion(req.startVersion), endVersion(req.endVersion),
|
2019-07-30 01:37:42 +08:00
|
|
|
recruitedEpoch(req.recruitedEpoch), backupEpoch(req.backupEpoch), minKnownCommittedVersion(invalidVersion),
|
2019-09-29 03:48:28 +08:00
|
|
|
savedVersion(invalidVersion), lastSeenVersion(invalidVersion), cc("BackupWorker", myId.toString()) {
|
2019-07-24 05:32:55 +08:00
|
|
|
cx = openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true);
|
2019-09-22 12:55:19 +08:00
|
|
|
pullFinished.set(false);
|
2019-07-24 02:45:04 +08:00
|
|
|
|
2019-08-12 11:15:50 +08:00
|
|
|
specialCounter(cc, "SavedVersion", [this]() { return this->savedVersion; });
|
2019-07-24 02:45:04 +08:00
|
|
|
specialCounter(cc, "MinKnownCommittedVersion", [this]() { return this->minKnownCommittedVersion; });
|
2019-09-23 04:23:27 +08:00
|
|
|
specialCounter(cc, "MsgQ", [this]() { return this->messages.size(); });
|
2019-07-24 02:45:04 +08:00
|
|
|
logger = traceCounters("BackupWorkerMetrics", myId, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc,
|
|
|
|
"BackupWorkerMetrics");
|
2019-09-10 01:21:16 +08:00
|
|
|
if (g_network->isSimulated()) {
|
|
|
|
container = IBackupContainer::openContainer("file://simfdb/mutation_backups/");
|
|
|
|
} else {
|
|
|
|
// TODO: use blobstore URL passed from somewhere.
|
2020-01-24 11:06:39 +08:00
|
|
|
//ASSERT(false);
|
|
|
|
//container = IBackupContainer::openContainer("blobstore://");
|
2019-09-10 01:21:16 +08:00
|
|
|
}
|
2019-05-24 07:06:23 +08:00
|
|
|
}
|
2019-08-15 05:19:50 +08:00
|
|
|
|
|
|
|
void pop() {
|
2019-08-15 08:00:20 +08:00
|
|
|
const LogEpoch oldest = logSystem.get()->getOldestBackupEpoch();
|
|
|
|
if (backupEpoch > oldest) {
|
|
|
|
// Defer pop if old epoch hasn't finished popping yet.
|
|
|
|
TraceEvent("BackupWorkerPopDeferred", myId)
|
|
|
|
.suppressFor(1.0)
|
|
|
|
.detail("BackupEpoch", backupEpoch)
|
|
|
|
.detail("OldestEpoch", oldest)
|
|
|
|
.detail("Version", savedVersion);
|
|
|
|
return;
|
|
|
|
}
|
2019-08-15 05:19:50 +08:00
|
|
|
const Tag popTag = logSystem.get()->getPseudoPopTag(tag, ProcessClass::BackupClass);
|
|
|
|
logSystem.get()->pop(savedVersion, popTag);
|
|
|
|
}
|
2019-09-23 04:23:27 +08:00
|
|
|
|
|
|
|
void eraseMessagesAfterEndVersion() {
|
|
|
|
ASSERT(endVersion.present());
|
|
|
|
const Version ver = endVersion.get();
|
|
|
|
while (!messages.empty()) {
|
|
|
|
if (messages.back().getVersion() > ver) {
|
|
|
|
messages.pop_back();
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-25 06:12:37 +08:00
|
|
|
};
|
|
|
|
|
2020-01-08 02:27:52 +08:00
|
|
|
ACTOR Future<Void> monitorBackupStarted(BackupData* self) {
|
|
|
|
loop {
|
|
|
|
state ReadYourWritesTransaction tr(self->cx);
|
|
|
|
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
2020-01-23 11:34:40 +08:00
|
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
2020-01-08 02:27:52 +08:00
|
|
|
Optional<Standalone<StringRef>> value = wait(tr.get(backupStartedKey));
|
|
|
|
if (value.present()) return Void();
|
|
|
|
|
|
|
|
state Future<Void> watchFuture = tr.watch(backupStartedKey);
|
|
|
|
wait(tr.commit());
|
|
|
|
wait(watchFuture);
|
|
|
|
break;
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-01 07:14:58 +08:00
|
|
|
ACTOR Future<Void> saveProgress(BackupData* self, Version backupVersion) {
|
2019-05-24 07:06:23 +08:00
|
|
|
state Transaction tr(self->cx);
|
2019-08-15 05:19:50 +08:00
|
|
|
state Key key = backupProgressKeyFor(self->myId);
|
2019-05-24 07:06:23 +08:00
|
|
|
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
|
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
|
|
|
2019-07-30 01:37:42 +08:00
|
|
|
WorkerBackupStatus status(self->backupEpoch, backupVersion, self->tag);
|
2019-08-15 05:19:50 +08:00
|
|
|
tr.set(key, backupProgressValue(status));
|
|
|
|
tr.addReadConflictRange(singleKeyRange(key));
|
2019-05-24 07:06:23 +08:00
|
|
|
wait(tr.commit());
|
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-22 07:59:23 +08:00
|
|
|
static std::string tagsToString(const VectorRef<Tag>& tags) {
|
|
|
|
std::string s;
|
|
|
|
bool first = true;
|
|
|
|
for (auto t = tags.begin(); t != tags.end(); t++) {
|
|
|
|
if (first) {
|
|
|
|
first = false;
|
|
|
|
} else {
|
|
|
|
s.append(", ");
|
|
|
|
}
|
|
|
|
s.append((*t).toString());
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-09-13 06:43:53 +08:00
|
|
|
// Returns true if the message is a mutation that should be backuped, i.e.,
|
|
|
|
// either key is not in system key space or is not a metadataVersionKey.
|
|
|
|
bool isBackupMessage(const VersionedMessage& msg) {
|
|
|
|
for (Tag tag : msg.tags) {
|
|
|
|
if (tag.locality == tagLocalitySpecial || tag.locality == tagLocalityTxs) {
|
|
|
|
return false; // skip Txs mutations
|
|
|
|
}
|
|
|
|
}
|
2019-09-17 06:56:23 +08:00
|
|
|
|
2019-09-18 11:53:04 +08:00
|
|
|
BinaryReader reader(msg.message.begin(), msg.message.size(), AssumeVersion(currentProtocolVersion));
|
|
|
|
|
|
|
|
// Return false for LogProtocolMessage.
|
|
|
|
if (LogProtocolMessage::isNextIn(reader)) return false;
|
|
|
|
|
|
|
|
MutationRef m;
|
|
|
|
reader >> m;
|
|
|
|
|
|
|
|
// check for metadataVersionKey and special metadata mutations
|
|
|
|
if (!normalKeys.contains(m.param1) && m.param1 != metadataVersionKey) {
|
|
|
|
return false;
|
2019-09-10 01:21:16 +08:00
|
|
|
}
|
2019-09-18 11:53:04 +08:00
|
|
|
|
2019-09-13 06:43:53 +08:00
|
|
|
return true;
|
2019-09-10 01:21:16 +08:00
|
|
|
}
|
|
|
|
|
2019-11-18 11:14:14 +08:00
|
|
|
// Return a block of contiguous padding bytes, growing if needed.
|
|
|
|
static Value makePadding(int size) {
|
|
|
|
static Value pad;
|
|
|
|
if (pad.size() < size) {
|
|
|
|
pad = makeString(size);
|
|
|
|
memset(mutateString(pad), '\xff', pad.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
return pad.substr(0, size);
|
|
|
|
}
|
|
|
|
|
2019-09-22 12:55:19 +08:00
|
|
|
// Saves messages in the range of [0, numMsg) to a file and then remove these
|
2019-11-13 08:44:59 +08:00
|
|
|
// messages. The file format is a sequence of (Version, sub#, msgSize, message),
|
2019-09-22 12:55:19 +08:00
|
|
|
ACTOR Future<Void> saveMutationsToFile(BackupData* self, Version popVersion, int numMsg) {
|
2020-01-08 06:15:29 +08:00
|
|
|
state int blockSize = SERVER_KNOBS->BACKUP_FILE_BLOCK_BYTES;
|
2019-09-22 12:55:19 +08:00
|
|
|
state Reference<IBackupFile> logFile =
|
2019-10-08 06:36:28 +08:00
|
|
|
wait(self->container->writeTaggedLogFile(self->messages[0].getVersion(), popVersion, blockSize, self->tag.id));
|
2019-11-18 11:14:14 +08:00
|
|
|
TraceEvent("OpenMutationFile", self->myId)
|
|
|
|
.detail("StartVersion", self->messages[0].getVersion())
|
|
|
|
.detail("EndVersion", popVersion)
|
|
|
|
.detail("BlockSize", blockSize)
|
|
|
|
.detail("TagId", self->tag.id)
|
|
|
|
.detail("File", logFile->getFileName());
|
2019-09-22 12:55:19 +08:00
|
|
|
state int idx = 0;
|
2019-11-18 11:14:14 +08:00
|
|
|
state int64_t blockEnd = 0;
|
2019-09-22 12:55:19 +08:00
|
|
|
for (; idx < numMsg; idx++) {
|
|
|
|
if (!isBackupMessage(self->messages[idx])) continue;
|
2019-11-18 11:14:14 +08:00
|
|
|
|
|
|
|
const int bytes = sizeof(Version) + sizeof(uint32_t) + sizeof(int) + self->messages[idx].message.size();
|
|
|
|
// Start a new block if needed
|
|
|
|
if (logFile->size() + bytes > blockEnd) {
|
|
|
|
// Write padding if needed
|
|
|
|
const int bytesLeft = blockEnd - logFile->size();
|
|
|
|
if (bytesLeft > 0) {
|
|
|
|
state Value paddingFFs = makePadding(bytesLeft);
|
|
|
|
wait(logFile->append(paddingFFs.begin(), bytesLeft));
|
|
|
|
}
|
|
|
|
|
|
|
|
blockEnd += blockSize;
|
|
|
|
// TODO: add block header
|
|
|
|
}
|
|
|
|
|
2019-11-22 07:59:23 +08:00
|
|
|
// Convert to big Endianness for version.version, version.sub, and msgSize
|
|
|
|
// The decoder assumes 0xFF is the end, so little endian can easily be
|
|
|
|
// mistaken as the end. In contrast, big endian for version almost guarantee
|
|
|
|
// the first byte is not 0xFF (should always be 0x00).
|
2019-10-26 07:01:09 +08:00
|
|
|
state int msgSize = self->messages[idx].message.size();
|
2019-11-22 07:59:23 +08:00
|
|
|
BinaryWriter wr(Unversioned());
|
|
|
|
wr << bigEndian64(self->messages[idx].version.version)
|
|
|
|
<< bigEndian32(self->messages[idx].version.sub)
|
|
|
|
<< bigEndian32(msgSize);
|
|
|
|
Standalone<StringRef> buf = wr.toValue();
|
|
|
|
wait(logFile->append((void*)buf.begin(), buf.size()));
|
2019-10-26 07:01:09 +08:00
|
|
|
wait(logFile->append(self->messages[idx].message.begin(), msgSize));
|
2019-09-22 12:55:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
self->messages.erase(self->messages.begin(), self->messages.begin() + numMsg);
|
|
|
|
wait(logFile->finish());
|
2019-11-18 11:14:14 +08:00
|
|
|
TraceEvent("CloseMutationFile", self->myId)
|
|
|
|
.detail("FileSize", logFile->size())
|
|
|
|
.detail("TagId", self->tag.id)
|
|
|
|
.detail("File", logFile->getFileName());
|
|
|
|
|
2019-09-22 12:55:19 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-09-23 04:23:27 +08:00
|
|
|
// Uploads self->messages to cloud storage and updates savedVersion.
|
2019-05-23 01:52:46 +08:00
|
|
|
ACTOR Future<Void> uploadData(BackupData* self) {
|
2019-05-24 07:06:23 +08:00
|
|
|
state Version popVersion = invalidVersion;
|
|
|
|
|
|
|
|
loop {
|
2019-09-22 12:55:19 +08:00
|
|
|
if (self->endVersion.present() && self->savedVersion >= self->endVersion.get()) {
|
|
|
|
self->messages.clear();
|
2019-08-12 11:15:50 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-08-21 06:09:45 +08:00
|
|
|
// FIXME: knobify the delay of 10s. This delay is sensitive, as it is the
|
|
|
|
// lag TLog might have. Changing to 20s may fail consistency check.
|
|
|
|
state Future<Void> uploadDelay = delay(10);
|
2019-08-15 05:19:50 +08:00
|
|
|
|
2019-08-12 11:15:50 +08:00
|
|
|
if (self->messages.empty()) {
|
|
|
|
// Even though messages is empty, we still want to advance popVersion.
|
|
|
|
popVersion = std::max(popVersion, self->lastSeenVersion);
|
|
|
|
} else {
|
2019-09-22 12:55:19 +08:00
|
|
|
const Version maxPopVersion =
|
|
|
|
self->endVersion.present() ? self->endVersion.get() : self->minKnownCommittedVersion;
|
|
|
|
int numMsg = 0;
|
|
|
|
for (const auto& message : self->messages) {
|
|
|
|
if (message.getVersion() > maxPopVersion) break;
|
|
|
|
popVersion = std::max(popVersion, message.getVersion());
|
2019-09-10 01:21:16 +08:00
|
|
|
numMsg++;
|
2019-08-12 11:15:50 +08:00
|
|
|
}
|
2019-09-10 01:21:16 +08:00
|
|
|
if (numMsg > 0) {
|
2019-09-22 12:55:19 +08:00
|
|
|
wait(saveMutationsToFile(self, popVersion, numMsg));
|
2019-09-10 01:21:16 +08:00
|
|
|
}
|
|
|
|
}
|
2019-09-23 04:23:27 +08:00
|
|
|
if (self->pullFinished.get() && self->messages.empty()) {
|
|
|
|
// Advance popVersion to the endVersion to avoid gap between last
|
|
|
|
// message version and the endVersion.
|
|
|
|
popVersion = self->endVersion.get();
|
|
|
|
}
|
2019-08-12 11:15:50 +08:00
|
|
|
|
|
|
|
if (popVersion > self->savedVersion) {
|
2019-09-22 12:55:19 +08:00
|
|
|
wait(saveProgress(self, popVersion));
|
|
|
|
TraceEvent("BackupWorkerSavedProgress", self->myId)
|
2019-09-29 03:48:28 +08:00
|
|
|
.detail("Tag", self->tag.toString())
|
2019-09-22 12:55:19 +08:00
|
|
|
.detail("Version", popVersion)
|
|
|
|
.detail("MsgQ", self->messages.size());
|
|
|
|
self->savedVersion = std::max(popVersion, self->savedVersion);
|
|
|
|
self->pop();
|
2019-07-24 02:45:04 +08:00
|
|
|
}
|
2019-08-12 11:15:50 +08:00
|
|
|
|
2019-09-23 04:23:27 +08:00
|
|
|
if (!self->pullFinished.get()) {
|
|
|
|
wait(uploadDelay || self->pullFinished.onChange());
|
|
|
|
}
|
2019-05-24 07:06:23 +08:00
|
|
|
}
|
2019-05-23 01:52:46 +08:00
|
|
|
}
|
|
|
|
|
2019-04-25 06:12:37 +08:00
|
|
|
// Pulls data from TLog servers using LogRouter tag.
|
|
|
|
ACTOR Future<Void> pullAsyncData(BackupData* self) {
|
|
|
|
state Future<Void> logSystemChange = Void();
|
|
|
|
state Reference<ILogSystem::IPeekCursor> r;
|
2019-08-13 10:10:46 +08:00
|
|
|
state Version tagAt = self->startVersion;
|
|
|
|
state Version lastVersion = 0;
|
2019-04-25 06:12:37 +08:00
|
|
|
|
|
|
|
loop {
|
|
|
|
loop choose {
|
2019-07-24 05:32:55 +08:00
|
|
|
when (wait(r ? r->getMore(TaskPriority::TLogCommit) : Never())) {
|
2019-04-25 06:12:37 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
when (wait(logSystemChange)) {
|
|
|
|
if (self->logSystem.get()) {
|
2019-05-24 07:06:23 +08:00
|
|
|
r = self->logSystem.get()->peekLogRouter(self->myId, tagAt, self->tag);
|
2019-04-25 06:12:37 +08:00
|
|
|
} else {
|
|
|
|
r = Reference<ILogSystem::IPeekCursor>();
|
|
|
|
}
|
|
|
|
logSystemChange = self->logSystem.onChange();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
self->minKnownCommittedVersion = std::max(self->minKnownCommittedVersion, r->getMinKnownCommittedVersion());
|
|
|
|
|
2019-09-10 01:21:16 +08:00
|
|
|
// Note we aggressively peek (uncommitted) messages, but only committed
|
|
|
|
// messages/mutations will be flushed to disk/blob in uploadData().
|
2019-04-25 06:12:37 +08:00
|
|
|
while (r->hasMessage()) {
|
2019-09-18 11:53:04 +08:00
|
|
|
self->messages.emplace_back(r->version(), r->getMessage(), r->getTags(), r->arena());
|
2019-04-25 06:12:37 +08:00
|
|
|
r->nextMessage();
|
|
|
|
}
|
|
|
|
|
|
|
|
tagAt = std::max(r->version().version, lastVersion);
|
2019-08-12 11:15:50 +08:00
|
|
|
self->lastSeenVersion = std::max(tagAt, self->lastSeenVersion);
|
2019-08-07 02:14:32 +08:00
|
|
|
TraceEvent("BackupWorkerGot", self->myId).suppressFor(1.0).detail("V", tagAt);
|
2019-09-22 12:55:19 +08:00
|
|
|
if (self->endVersion.present() && tagAt > self->endVersion.get()) {
|
2019-09-23 04:23:27 +08:00
|
|
|
self->eraseMessagesAfterEndVersion();
|
2019-08-13 10:10:46 +08:00
|
|
|
TraceEvent("BackupWorkerFinishPull", self->myId)
|
2019-09-29 03:48:28 +08:00
|
|
|
.detail("Tag", self->tag.toString())
|
2019-08-13 10:10:46 +08:00
|
|
|
.detail("VersionGot", tagAt)
|
2019-09-22 12:55:19 +08:00
|
|
|
.detail("EndVersion", self->endVersion.get())
|
|
|
|
.detail("MsgQ", self->messages.size());
|
|
|
|
self->pullFinished.set(true);
|
2019-08-13 10:10:46 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2019-08-12 11:15:50 +08:00
|
|
|
wait(yield());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-08 06:15:29 +08:00
|
|
|
ACTOR Future<Void> monitorBackupKeyOrPullData(BackupData* self) {
|
2020-01-08 02:27:52 +08:00
|
|
|
loop {
|
2020-01-08 06:15:29 +08:00
|
|
|
state Future<Void> started = monitorBackupStarted(self);
|
2020-01-08 02:27:52 +08:00
|
|
|
|
2020-01-08 06:15:29 +08:00
|
|
|
loop {
|
|
|
|
GetReadVersionRequest request(1, GetReadVersionRequest::PRIORITY_DEFAULT |
|
|
|
|
GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION);
|
|
|
|
|
|
|
|
choose {
|
|
|
|
when(wait(started)) { break; }
|
|
|
|
when(wait(self->cx->onMasterProxiesChanged())) {}
|
|
|
|
when(GetReadVersionReply reply = wait(loadBalance(self->cx->getMasterProxies(false),
|
|
|
|
&MasterProxyInterface::getConsistentReadVersion,
|
|
|
|
request, self->cx->taskID))) {
|
|
|
|
self->savedVersion = std::max(reply.version, self->savedVersion);
|
|
|
|
self->minKnownCommittedVersion = std::max(reply.version, self->minKnownCommittedVersion);
|
|
|
|
self->pop(); // Pop while the worker is in this NOOP state.
|
|
|
|
wait(delay(SERVER_KNOBS->BACKUP_NOOP_POP_DELAY, self->cx->taskID));
|
|
|
|
}
|
|
|
|
}
|
2020-01-08 02:27:52 +08:00
|
|
|
}
|
|
|
|
|
2020-01-08 06:15:29 +08:00
|
|
|
TraceEvent("BackupWorkerStartPullData", self->myId);
|
|
|
|
wait(pullAsyncData(self));
|
|
|
|
}
|
2020-01-08 02:27:52 +08:00
|
|
|
}
|
|
|
|
|
2019-06-01 07:14:58 +08:00
|
|
|
ACTOR Future<Void> checkRemoved(Reference<AsyncVar<ServerDBInfo>> db, LogEpoch recoveryCount,
|
|
|
|
BackupData* self) {
|
2019-05-16 07:13:04 +08:00
|
|
|
loop {
|
|
|
|
bool isDisplaced =
|
2019-08-13 04:15:15 +08:00
|
|
|
db->get().recoveryCount > recoveryCount && db->get().recoveryState != RecoveryState::UNINITIALIZED;
|
2019-05-16 07:13:04 +08:00
|
|
|
if (isDisplaced) {
|
2019-06-01 07:14:58 +08:00
|
|
|
TraceEvent("BackupWorkerDisplaced", self->myId)
|
|
|
|
.detail("RecoveryCount", recoveryCount)
|
2019-08-12 11:15:50 +08:00
|
|
|
.detail("SavedVersion", self->savedVersion)
|
2019-08-13 04:15:15 +08:00
|
|
|
.detail("BackupWorkers", describe(db->get().logSystemConfig.tLogs))
|
2019-06-01 07:14:58 +08:00
|
|
|
.detail("DBRecoveryCount", db->get().recoveryCount)
|
2019-07-24 02:45:04 +08:00
|
|
|
.detail("RecoveryState", (int)db->get().recoveryState);
|
2019-06-01 07:14:58 +08:00
|
|
|
throw worker_removed();
|
2019-05-16 07:13:04 +08:00
|
|
|
}
|
|
|
|
wait(db->onChange());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-21 05:22:31 +08:00
|
|
|
ACTOR Future<Void> backupWorker(BackupInterface interf, InitializeBackupRequest req,
|
|
|
|
Reference<AsyncVar<ServerDBInfo>> db) {
|
2019-09-29 03:48:28 +08:00
|
|
|
state BackupData self(db, req);
|
2019-04-25 06:12:37 +08:00
|
|
|
state PromiseStream<Future<Void>> addActor;
|
2019-05-16 07:13:04 +08:00
|
|
|
state Future<Void> error = actorCollection(addActor.getFuture());
|
2019-04-25 06:12:37 +08:00
|
|
|
state Future<Void> dbInfoChange = Void();
|
|
|
|
|
2019-09-29 03:48:28 +08:00
|
|
|
TraceEvent("BackupWorkerStart", self.myId)
|
2019-06-01 07:14:58 +08:00
|
|
|
.detail("Tag", req.routerTag.toString())
|
|
|
|
.detail("StartVersion", req.startVersion)
|
2019-08-12 11:15:50 +08:00
|
|
|
.detail("EndVersion", req.endVersion.present() ? req.endVersion.get() : -1)
|
2019-07-30 01:37:42 +08:00
|
|
|
.detail("LogEpoch", req.recruitedEpoch)
|
|
|
|
.detail("BackupEpoch", req.backupEpoch);
|
2019-04-25 06:12:37 +08:00
|
|
|
try {
|
2020-01-08 02:27:52 +08:00
|
|
|
addActor.send(monitorBackupKeyOrPullData(&self));
|
2019-08-15 05:19:50 +08:00
|
|
|
addActor.send(checkRemoved(db, req.recruitedEpoch, &self));
|
2019-05-21 05:22:31 +08:00
|
|
|
addActor.send(waitFailureServer(interf.waitFailure.getFuture()));
|
2019-04-25 06:12:37 +08:00
|
|
|
|
2019-08-19 12:22:26 +08:00
|
|
|
state Future<Void> done = uploadData(&self);
|
|
|
|
|
2019-04-25 06:12:37 +08:00
|
|
|
loop choose {
|
2019-05-16 07:13:04 +08:00
|
|
|
when(wait(dbInfoChange)) {
|
2019-04-25 06:12:37 +08:00
|
|
|
dbInfoChange = db->onChange();
|
2019-08-09 07:02:49 +08:00
|
|
|
Reference<ILogSystem> ls = ILogSystem::fromServerDBInfo(self.myId, db->get(), true);
|
2019-08-15 08:00:20 +08:00
|
|
|
bool hasPseudoLocality = ls.isValid() && ls->hasPseudoLocality(tagLocalityBackup);
|
2019-12-04 08:05:12 +08:00
|
|
|
LogEpoch oldestBackupEpoch = 0;
|
2019-08-15 08:00:20 +08:00
|
|
|
if (hasPseudoLocality) {
|
2019-08-09 07:02:49 +08:00
|
|
|
self.logSystem.set(ls);
|
2019-08-15 05:19:50 +08:00
|
|
|
self.pop();
|
2019-12-04 08:05:12 +08:00
|
|
|
oldestBackupEpoch = ls->getOldestBackupEpoch();
|
2019-08-09 07:02:49 +08:00
|
|
|
}
|
2019-09-29 03:48:28 +08:00
|
|
|
TraceEvent("BackupWorkerLogSystem", self.myId)
|
2019-08-15 08:00:20 +08:00
|
|
|
.detail("HasBackupLocality", hasPseudoLocality)
|
2019-12-04 08:05:12 +08:00
|
|
|
.detail("OldestBackupEpoch", oldestBackupEpoch)
|
2019-08-15 08:00:20 +08:00
|
|
|
.detail("Tag", self.tag.toString());
|
2019-04-25 06:12:37 +08:00
|
|
|
}
|
2019-08-19 12:22:26 +08:00
|
|
|
when(wait(done)) {
|
2019-09-29 03:48:28 +08:00
|
|
|
TraceEvent("BackupWorkerDone", self.myId).detail("BackupEpoch", self.backupEpoch);
|
2019-08-15 08:00:20 +08:00
|
|
|
// Notify master so that this worker can be removed from log system, then this
|
|
|
|
// worker (for an old epoch's unfinished work) can safely exit.
|
|
|
|
wait(brokenPromiseToNever(db->get().master.notifyBackupWorkerDone.getReply(
|
2019-09-23 04:23:27 +08:00
|
|
|
BackupWorkerDoneRequest(self.myId, self.backupEpoch))));
|
2019-08-15 08:00:20 +08:00
|
|
|
break;
|
2019-06-01 07:14:58 +08:00
|
|
|
}
|
2019-05-16 07:13:04 +08:00
|
|
|
when(wait(error)) {}
|
2019-04-25 06:12:37 +08:00
|
|
|
}
|
2019-05-16 07:13:04 +08:00
|
|
|
} catch (Error& e) {
|
2019-09-29 03:48:28 +08:00
|
|
|
TraceEvent("BackupWorkerTerminated", self.myId).error(e, true);
|
2019-06-01 07:14:58 +08:00
|
|
|
if (e.code() != error_code_actor_cancelled && e.code() != error_code_worker_removed) {
|
2019-04-25 06:12:37 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|