This commit is contained in:
Jingyu Zhou 2025-03-21 17:00:42 -07:00 committed by GitHub
commit 9b08a3a1f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 940 additions and 49 deletions

View File

@ -7569,28 +7569,26 @@ public:
// When set to true, gives an inconsistent snapshot, thus not recommended
// beginVersions: restore's begin version for each range
// randomUid: the UID for lock the database
ACTOR static Future<Version> restore(
FileBackupAgent* backupAgent,
Database cx,
Optional<Database> cxOrig,
Key tagName,
Key url,
Optional<std::string> proxy,
Standalone<VectorRef<KeyRangeRef>> ranges,
Standalone<VectorRef<Version>> beginVersions,
WaitForComplete waitForComplete,
Version targetVersion,
Verbose verbose,
Key addPrefix,
Key removePrefix,
LockDB lockDB,
UnlockDB unlockDB,
OnlyApplyMutationLogs onlyApplyMutationLogs,
InconsistentSnapshotOnly inconsistentSnapshotOnly,
Optional<std::string> encryptionKeyFileName,
UID randomUid,
Optional<std::string> blobManifestUrl,
TransformPartitionedLog transformPartitionedLog = TransformPartitionedLog::False) {
ACTOR static Future<Version> restore(FileBackupAgent* backupAgent,
Database cx,
Optional<Database> cxOrig,
Key tagName,
Key url,
Optional<std::string> proxy,
Standalone<VectorRef<KeyRangeRef>> ranges,
Standalone<VectorRef<Version>> beginVersions,
WaitForComplete waitForComplete,
Version targetVersion,
Verbose verbose,
Key addPrefix,
Key removePrefix,
LockDB lockDB,
UnlockDB unlockDB,
OnlyApplyMutationLogs onlyApplyMutationLogs,
InconsistentSnapshotOnly inconsistentSnapshotOnly,
Optional<std::string> encryptionKeyFileName,
UID randomUid,
Optional<std::string> blobManifestUrl) {
// The restore command line tool won't allow ranges to be empty, but correctness workloads somehow might.
if (ranges.empty()) {
throw restore_error();
@ -7655,7 +7653,7 @@ public:
beginVersion,
randomUid,
blobManifestUrl,
transformPartitionedLog));
TransformPartitionedLog(desc.partitioned)));
wait(tr->commit());
break;
} catch (Error& e) {
@ -7944,8 +7942,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
OnlyApplyMutationLogs onlyApplyMutationLogs,
InconsistentSnapshotOnly inconsistentSnapshotOnly,
Optional<std::string> const& encryptionKeyFileName,
Optional<std::string> blobManifestUrl,
TransformPartitionedLog transformPartitionedLog) {
Optional<std::string> blobManifestUrl) {
return FileBackupAgentImpl::restore(this,
cx,
cxOrig,
@ -7965,8 +7962,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
inconsistentSnapshotOnly,
encryptionKeyFileName,
deterministicRandom()->randomUniqueID(),
blobManifestUrl,
transformPartitionedLog);
blobManifestUrl);
}
Future<Version> FileBackupAgent::restore(Database cx,
@ -7986,8 +7982,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
InconsistentSnapshotOnly inconsistentSnapshotOnly,
Version beginVersion,
Optional<std::string> const& encryptionKeyFileName,
Optional<std::string> blobManifestUrl,
TransformPartitionedLog transformPartitionedLog) {
Optional<std::string> blobManifestUrl) {
Standalone<VectorRef<Version>> beginVersions;
for (auto i = 0; i < ranges.size(); ++i) {
beginVersions.push_back(beginVersions.arena(), beginVersion);
@ -8009,8 +8004,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
onlyApplyMutationLogs,
inconsistentSnapshotOnly,
encryptionKeyFileName,
blobManifestUrl,
transformPartitionedLog);
blobManifestUrl);
}
Future<Version> FileBackupAgent::restore(Database cx,

View File

@ -25,14 +25,15 @@
#elif !defined(FDBCLIENT_BACKUP_AGENT_ACTOR_H)
#define FDBCLIENT_BACKUP_AGENT_ACTOR_H
#include <ctime>
#include <climits>
#include "flow/flow.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/TaskBucket.h"
#include "fdbclient/Notified.h"
#include "flow/IAsyncFile.h"
#include "fdbclient/KeyBackedTypes.actor.h"
#include <ctime>
#include <climits>
#include "fdbclient/BackupContainer.h"
#include "flow/actorcompiler.h" // has to be last include
@ -205,8 +206,7 @@ public:
OnlyApplyMutationLogs = OnlyApplyMutationLogs::False,
InconsistentSnapshotOnly = InconsistentSnapshotOnly::False,
Optional<std::string> const& encryptionKeyFileName = {},
Optional<std::string> blobManifestUrl = {},
TransformPartitionedLog transformPartitionedLog = TransformPartitionedLog::False);
Optional<std::string> blobManifestUrl = {});
// this method will construct range and version vectors and then call restore()
Future<Version> restore(Database cx,
@ -245,8 +245,7 @@ public:
InconsistentSnapshotOnly inconsistentSnapshotOnly = InconsistentSnapshotOnly::False,
Version beginVersion = ::invalidVersion,
Optional<std::string> const& encryptionKeyFileName = {},
Optional<std::string> blobManifestUrl = {},
TransformPartitionedLog transformPartitionedLog = TransformPartitionedLog::False);
Optional<std::string> blobManifestUrl = {});
Future<Version> atomicRestore(Database cx,
Key tagName,

View File

@ -646,7 +646,8 @@ T simulate(const T& in) {
}
ACTOR Future<Void> runBackup(Reference<IClusterConnectionRecord> connRecord) {
state std::vector<Future<Void>> agentFutures;
state Future<Void> agentFuture;
state FileBackupAgent fileAgent;
while (g_simulator->backupAgents == ISimulator::BackupAgentType::WaitForType) {
wait(delay(1.0));
@ -655,17 +656,15 @@ ACTOR Future<Void> runBackup(Reference<IClusterConnectionRecord> connRecord) {
if (g_simulator->backupAgents == ISimulator::BackupAgentType::BackupToFile) {
Database cx = Database::createDatabase(connRecord, ApiVersion::LATEST_VERSION);
state FileBackupAgent fileAgent;
agentFutures.push_back(fileAgent.run(
cx, 1.0 / CLIENT_KNOBS->BACKUP_AGGREGATE_POLL_RATE, CLIENT_KNOBS->SIM_BACKUP_TASKS_PER_AGENT));
agentFuture =
fileAgent.run(cx, 1.0 / CLIENT_KNOBS->BACKUP_AGGREGATE_POLL_RATE, CLIENT_KNOBS->SIM_BACKUP_TASKS_PER_AGENT);
while (g_simulator->backupAgents == ISimulator::BackupAgentType::BackupToFile) {
wait(delay(1.0));
}
for (auto it : agentFutures) {
it.cancel();
}
TraceEvent("SimBackupAgentsStopping").log();
agentFuture.cancel();
}
wait(Future<Void>(Never()));

View File

@ -0,0 +1,351 @@
/*
* Backup.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2025 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/ReadYourWrites.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/BackupContainer.h"
#include "fdbclient/BackupContainerFileSystem.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "flow/IRandom.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// A workload which only performs backup operations. A seperate workload is used to perform restore operations.
struct BackupWorkload : TestWorkload {
static constexpr auto NAME = "Backup";
double backupAfter, restoreAfter, abortAndRestartAfter;
double minBackupAfter;
double backupStartAt, restoreStartAfterBackupFinished, stopDifferentialAfter;
Key backupTag;
bool differentialBackup;
Standalone<VectorRef<KeyRangeRef>> backupRanges;
LockDB locked{ false };
UsePartitionedLog usePartitionedLog{ true };
bool allowPauses;
Optional<std::string> encryptionKeyFileName;
BackupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
locked.set(sharedRandomNumber % 2);
bool partitioned = getOption(options, "usePartitionedLog"_sr, true);
usePartitionedLog.set(partitioned);
backupAfter = getOption(options, "backupAfter"_sr, 10.0);
double minBackupAfter = getOption(options, "minBackupAfter"_sr, backupAfter);
if (backupAfter > minBackupAfter) {
backupAfter = deterministicRandom()->random01() * (backupAfter - minBackupAfter) + minBackupAfter;
}
restoreAfter = getOption(options, "restoreAfter"_sr, 35.0);
backupTag = getOption(options, "backupTag"_sr, BackupAgentBase::getDefaultTag());
abortAndRestartAfter =
getOption(options,
"abortAndRestartAfter"_sr,
deterministicRandom()->random01() < 0.5
? deterministicRandom()->random01() * (restoreAfter - backupAfter) + backupAfter
: 0.0);
differentialBackup =
getOption(options, "differentialBackup"_sr, deterministicRandom()->random01() < 0.5 ? true : false);
stopDifferentialAfter =
getOption(options,
"stopDifferentialAfter"_sr,
differentialBackup ? deterministicRandom()->random01() *
(restoreAfter - std::max(abortAndRestartAfter, backupAfter)) +
std::max(abortAndRestartAfter, backupAfter)
: 0.0);
allowPauses = getOption(options, "allowPauses"_sr, true);
std::vector<std::string> restorePrefixesToInclude =
getOption(options, "restorePrefixesToInclude"_sr, std::vector<std::string>());
if (getOption(options, "encrypted"_sr, deterministicRandom()->random01() < 0.1)) {
encryptionKeyFileName = "simfdb/" + getTestEncryptionFileName();
}
TraceEvent("BW_ClientId").detail("Id", wcx.clientId);
backupRanges.push_back_deep(backupRanges.arena(), normalKeys);
}
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> start(Database const& cx) override {
if (clientId != 0)
return Void();
TraceEvent(SevInfo, "BW_Param")
.detail("Locked", locked)
.detail("BackupAfter", backupAfter)
.detail("RestoreAfter", restoreAfter)
.detail("BackupTag", printable(backupTag).c_str())
.detail("AbortAndRestartAfter", abortAndRestartAfter)
.detail("DifferentialBackup", differentialBackup)
.detail("StopDifferentialAfter", stopDifferentialAfter)
.detail("Encrypted", encryptionKeyFileName.present());
return _start(cx, this);
}
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(std::vector<PerfMetric>& m) override {}
ACTOR static Future<Void> changePaused(Database cx, FileBackupAgent* backupAgent) {
loop {
wait(backupAgent->changePause(cx, true));
TraceEvent("BW_AgentPaused").log();
wait(delay(30 * deterministicRandom()->random01()));
wait(backupAgent->changePause(cx, false));
TraceEvent("BW_AgentResumed").log();
wait(delay(120 * deterministicRandom()->random01()));
}
}
ACTOR static Future<Void> statusLoop(Database cx, std::string tag) {
state FileBackupAgent agent;
loop {
bool active = wait(agent.checkActive(cx));
TraceEvent("BW_AgentActivityCheck").detail("IsActive", active);
std::string status = wait(agent.getStatus(cx, ShowErrors::True, tag));
puts(status.c_str());
std::string statusJSON = wait(agent.getStatusJSON(cx, tag));
puts(statusJSON.c_str());
wait(delay(2.0));
}
}
ACTOR static Future<Void> doBackup(BackupWorkload* self,
double startDelay,
FileBackupAgent* backupAgent,
Database cx,
Key tag,
Standalone<VectorRef<KeyRangeRef>> backupRanges,
double stopDifferentialDelay) {
state UID randomID = nondeterministicRandom()->randomUniqueID();
state Future<Void> stopDifferentialFuture = delay(stopDifferentialDelay);
wait(delay(startDelay));
if (startDelay || BUGGIFY) {
TraceEvent("BW_DoBackupAbortBackup1", randomID)
.detail("Tag", printable(tag))
.detail("StartDelay", startDelay);
try {
wait(backupAgent->abortBackup(cx, tag.toString()));
} catch (Error& e) {
TraceEvent("BW_DoBackupAbortBackupException", randomID).error(e).detail("Tag", printable(tag));
if (e.code() != error_code_backup_unneeded)
throw;
}
}
TraceEvent("BW_DoBackupSubmitBackup", randomID)
.detail("Tag", printable(tag))
.detail("StopWhenDone", stopDifferentialDelay ? "False" : "True");
state std::string backupContainer = "file://simfdb/backups/";
state Future<Void> status = statusLoop(cx, tag.toString());
try {
wait(backupAgent->submitBackup(cx,
StringRef(backupContainer),
{},
deterministicRandom()->randomInt(0, 60),
deterministicRandom()->randomInt(0, 2000),
tag.toString(),
backupRanges,
false,
StopWhenDone{ !stopDifferentialDelay },
self->usePartitionedLog,
IncrementalBackupOnly::False,
self->encryptionKeyFileName));
} catch (Error& e) {
TraceEvent("BW_DoBackupSubmitBackupException", randomID).error(e).detail("Tag", printable(tag));
if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate)
throw;
}
// Stop the differential backup, if enabled
if (stopDifferentialDelay) {
CODE_PROBE(!stopDifferentialFuture.isReady(),
"Restore starts at specified time - stopDifferential not ready");
wait(stopDifferentialFuture);
TraceEvent("BW_DoBackupWaitToDiscontinue", randomID)
.detail("Tag", printable(tag))
.detail("DifferentialAfter", stopDifferentialDelay);
try {
if (BUGGIFY) {
state KeyBackedTag backupTag = makeBackupTag(tag.toString());
TraceEvent("BW_DoBackupWaitForRestorable", randomID).detail("Tag", backupTag.tagName);
// Wait until the backup is in a restorable state and get the status, URL, and UID atomically
state Reference<IBackupContainer> lastBackupContainer;
state UID lastBackupUID;
state EBackupState resultWait = wait(backupAgent->waitBackup(
cx, backupTag.tagName, StopWhenDone::False, &lastBackupContainer, &lastBackupUID));
TraceEvent("BW_DoBackupWaitForRestorable", randomID)
.detail("Tag", backupTag.tagName)
.detail("Result", BackupAgentBase::getStateText(resultWait));
state bool restorable = false;
if (lastBackupContainer) {
state Future<BackupDescription> fdesc = lastBackupContainer->describeBackup();
wait(ready(fdesc));
if (!fdesc.isError()) {
state BackupDescription desc = fdesc.get();
wait(desc.resolveVersionTimes(cx));
printf("BackupDescription:\n%s\n", desc.toString().c_str());
restorable = desc.maxRestorableVersion.present();
}
}
TraceEvent("BW_LastBackupContainer", randomID)
.detail("BackupTag", printable(tag))
.detail("LastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "")
.detail("LastBackupUID", lastBackupUID)
.detail("WaitStatus", BackupAgentBase::getStateText(resultWait))
.detail("Restorable", restorable);
// Do not check the backup, if aborted
if (resultWait == EBackupState::STATE_ABORTED) {
}
// Ensure that a backup container was found
else if (!lastBackupContainer) {
TraceEvent(SevError, "BW_MissingBackupContainer", randomID)
.detail("LastBackupUID", lastBackupUID)
.detail("BackupTag", printable(tag))
.detail("WaitStatus", BackupAgentBase::getStateText(resultWait));
printf("BackupCorrectnessMissingBackupContainer tag: %s status: %s\n",
printable(tag).c_str(),
BackupAgentBase::getStateText(resultWait));
}
// Check that backup is restorable
else if (!restorable) {
TraceEvent(SevError, "BW_NotRestorable", randomID)
.detail("LastBackupUID", lastBackupUID)
.detail("BackupTag", printable(tag))
.detail("BackupFolder", lastBackupContainer->getURL())
.detail("WaitStatus", BackupAgentBase::getStateText(resultWait));
printf("BackupCorrectnessNotRestorable: tag: %s\n", printable(tag).c_str());
}
// Abort the backup, if not the first backup because the second backup may have aborted the backup
// by now
if (startDelay) {
TraceEvent("BW_DoBackupAbortBackup2", randomID)
.detail("Tag", printable(tag))
.detail("WaitStatus", BackupAgentBase::getStateText(resultWait))
.detail("LastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "")
.detail("Restorable", restorable);
wait(backupAgent->abortBackup(cx, tag.toString()));
} else {
TraceEvent("BW_DoBackupDiscontinueBackup", randomID)
.detail("Tag", printable(tag))
.detail("DifferentialAfter", stopDifferentialDelay);
wait(backupAgent->discontinueBackup(cx, tag));
}
}
else {
TraceEvent("BW_DoBackupDiscontinueBackup", randomID)
.detail("Tag", printable(tag))
.detail("DifferentialAfter", stopDifferentialDelay);
wait(backupAgent->discontinueBackup(cx, tag));
}
} catch (Error& e) {
TraceEvent("BW_DoBackupDiscontinueBackupException", randomID).error(e).detail("Tag", printable(tag));
if (e.code() != error_code_backup_unneeded && e.code() != error_code_backup_duplicate)
throw;
}
}
// Wait for the backup to complete
TraceEvent("BW_DoBackupWaitBackup", randomID).detail("Tag", printable(tag));
state EBackupState statusValue = wait(backupAgent->waitBackup(cx, tag.toString(), StopWhenDone::True));
std::string statusText = wait(backupAgent->getStatus(cx, ShowErrors::True, tag.toString()));
// Can we validate anything about status?
TraceEvent("BW_DoBackupComplete", randomID)
.detail("Tag", printable(tag))
.detail("Status", statusText)
.detail("StatusValue", BackupAgentBase::getStateText(statusValue));
return Void();
}
ACTOR static Future<Void> _start(Database cx, BackupWorkload* self) {
state FileBackupAgent backupAgent;
state bool extraTasks = false;
TraceEvent("BW_Arguments")
.detail("BackupTag", printable(self->backupTag))
.detail("BackupAfter", self->backupAfter)
.detail("RestoreAfter", self->restoreAfter)
.detail("AbortAndRestartAfter", self->abortAndRestartAfter)
.detail("DifferentialAfter", self->stopDifferentialAfter);
state UID randomID = nondeterministicRandom()->randomUniqueID();
if (self->allowPauses && BUGGIFY) {
state Future<Void> cp = changePaused(cx, &backupAgent);
}
if (self->encryptionKeyFileName.present()) {
wait(BackupContainerFileSystem::createTestEncryptionKeyFile(self->encryptionKeyFileName.get()));
}
try {
state Future<Void> startRestore = delay(self->restoreAfter);
// backup
wait(delay(self->backupAfter));
TraceEvent("BW_DoBackup1", randomID).detail("Tag", printable(self->backupTag));
state Future<Void> b =
doBackup(self, 0, &backupAgent, cx, self->backupTag, self->backupRanges, self->stopDifferentialAfter);
TraceEvent("BW_DoBackupWait", randomID)
.detail("BackupTag", printable(self->backupTag))
.detail("AbortAndRestartAfter", self->abortAndRestartAfter);
try {
wait(b);
} catch (Error& e) {
if (e.code() != error_code_database_locked)
throw;
return Void();
}
TraceEvent("BW_DoBackupDone", randomID)
.detail("BackupTag", printable(self->backupTag))
.detail("AbortAndRestartAfter", self->abortAndRestartAfter);
wait(startRestore);
// We can't remove after backup agents since the restore also needs them.
// I.e., g_simulator->backupAgents = ISimulator::BackupAgentType::NoBackupAgents
} catch (Error& e) {
TraceEvent(SevError, "BackupCorrectness").error(e).GetLastError();
throw;
}
return Void();
}
};
WorkloadFactory<BackupWorkload> BackupWorkloadFactory;

View File

@ -501,8 +501,7 @@ struct BackupAndRestorePartitionedCorrectnessWorkload : TestWorkload {
InconsistentSnapshotOnly::False,
::invalidVersion,
self->encryptionKeyFileName,
{},
TransformPartitionedLog::True)));
{})));
printf("BackupCorrectness, backupAgent.restore finished for tag:%s\n", restoreTag.toString().c_str());
return Void();
}
@ -657,8 +656,7 @@ struct BackupAndRestorePartitionedCorrectnessWorkload : TestWorkload {
InconsistentSnapshotOnly::False,
::invalidVersion,
self->encryptionKeyFileName,
{},
TransformPartitionedLog::True));
{}));
wait(waitForAll(restores));

View File

@ -0,0 +1,364 @@
/*
* BackupCorrectness.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2025 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/DatabaseConfiguration.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/ReadYourWrites.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/BackupAgent.actor.h"
#include "fdbclient/BackupContainer.h"
#include "fdbclient/BackupContainerFileSystem.h"
#include "fdbclient/TenantManagement.actor.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/workloads/BulkSetup.actor.h"
#include "flow/IRandom.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// A workload which test the correctness of backup and restore process
struct RestoreWorkload : TestWorkload {
static constexpr auto NAME = "Restore";
Key backupTag, backupTag1, backupTag2;
bool performRestore, agentRequest;
Standalone<VectorRef<KeyRangeRef>> backupRanges, restoreRanges;
static int backupAgentRequests;
LockDB locked{ false };
bool allowPauses;
bool shareLogRange;
bool shouldSkipRestoreRanges;
Optional<std::string> encryptionKeyFileName;
UID randomID;
RestoreWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
locked.set(sharedRandomNumber % 2);
performRestore = getOption(options, "performRestore"_sr, true);
backupTag1 = getOption(options, "backupTag1"_sr, BackupAgentBase::getDefaultTag());
backupTag2 = getOption(options, "backupTag2"_sr, BackupAgentBase::getDefaultTag());
backupTag = deterministicRandom()->coinflip() ? backupTag1 : backupTag2;
agentRequest = getOption(options, "simBackupAgents"_sr, true);
allowPauses = getOption(options, "allowPauses"_sr, true);
shareLogRange = getOption(options, "shareLogRange"_sr, false);
std::vector<std::string> restorePrefixesToInclude =
getOption(options, "restorePrefixesToInclude"_sr, std::vector<std::string>());
shouldSkipRestoreRanges = deterministicRandom()->random01() < 0.3 ? true : false;
if (getOption(options, "encrypted"_sr, deterministicRandom()->random01() < 0.1)) {
encryptionKeyFileName = "simfdb/" + getTestEncryptionFileName();
}
randomID = nondeterministicRandom()->randomUniqueID();
TraceEvent("RW_ClientId").detail("Id", wcx.clientId);
TraceEvent("RW_PerformRestore", randomID).detail("Value", performRestore);
backupRanges.push_back_deep(backupRanges.arena(), normalKeys);
restoreRanges = backupRanges; // may be modified later
for (auto& range : restoreRanges) {
TraceEvent("RW_RestoreRange", randomID)
.detail("RangeBegin", printable(range.begin))
.detail("RangeEnd", printable(range.end));
}
}
Future<Void> setup(Database const& cx) override { return Void(); }
Future<Void> start(Database const& cx) override {
if (clientId != 0)
return Void();
TraceEvent(SevInfo, "RW_Param")
.detail("Locked", locked)
.detail("PerformRestore", performRestore)
.detail("BackupTag", printable(backupTag).c_str())
.detail("AgentRequest", agentRequest)
.detail("Encrypted", encryptionKeyFileName.present());
return _start(cx, this);
}
Future<bool> check(Database const& cx) override { return true; }
void getMetrics(std::vector<PerfMetric>& m) override {}
ACTOR static Future<Void> changePaused(Database cx, FileBackupAgent* backupAgent) {
loop {
wait(backupAgent->changePause(cx, true));
TraceEvent("RW_AgentPaused").log();
wait(delay(10 * deterministicRandom()->random01()));
wait(backupAgent->changePause(cx, false));
TraceEvent("RW_AgentResumed").log();
wait(delay(20 * deterministicRandom()->random01()));
}
}
ACTOR static Future<Void> statusLoop(Database cx, std::string tag) {
state FileBackupAgent agent;
loop {
bool active = wait(agent.checkActive(cx));
TraceEvent("RW_AgentActivityCheck").detail("IsActive", active);
std::string status = wait(agent.getStatus(cx, ShowErrors::True, tag));
puts(status.c_str());
std::string statusJSON = wait(agent.getStatusJSON(cx, tag));
puts(statusJSON.c_str());
wait(delay(10.0));
}
}
ACTOR static Future<Void> _start(Database cx, RestoreWorkload* self) {
state FileBackupAgent backupAgent;
state DatabaseConfiguration config = wait(getDatabaseConfiguration(cx));
TraceEvent("RW_Arguments")
.detail("BackupTag", printable(self->backupTag))
.detail("PerformRestore", self->performRestore);
if (self->allowPauses && BUGGIFY) {
state Future<Void> cp = changePaused(cx, &backupAgent);
}
state Future<Void> status = statusLoop(cx, self->backupTag.toString());
// Increment the backup agent requests
if (self->agentRequest) {
RestoreWorkload::backupAgentRequests++;
}
if (self->encryptionKeyFileName.present()) {
wait(BackupContainerFileSystem::createTestEncryptionKeyFile(self->encryptionKeyFileName.get()));
}
try {
state KeyBackedTag keyBackedTag = makeBackupTag(self->backupTag.toString());
UidAndAbortedFlagT uidFlag = wait(keyBackedTag.getOrThrow(cx.getReference()));
state UID logUid = uidFlag.first;
state Key destUidValue = wait(BackupConfig(logUid).destUidValue().getD(cx.getReference()));
state Reference<IBackupContainer> lastBackupContainer =
wait(BackupConfig(logUid).backupContainer().getD(cx.getReference()));
if (lastBackupContainer && self->performRestore) {
auto container = IBackupContainer::openContainer(lastBackupContainer->getURL(),
lastBackupContainer->getProxy(),
lastBackupContainer->getEncryptionKeyFileName());
BackupDescription desc = wait(container->describeBackup());
state Version targetVersion = -1;
if (desc.maxRestorableVersion.present()) {
if (deterministicRandom()->random01() < 0.1) {
targetVersion = desc.minRestorableVersion.get();
} else if (deterministicRandom()->random01() < 0.1) {
targetVersion = desc.maxRestorableVersion.get();
} else if (deterministicRandom()->random01() < 0.5) {
targetVersion = deterministicRandom()->randomInt64(desc.minRestorableVersion.get(),
desc.contiguousLogEnd.get());
}
}
TraceEvent("RW_Restore", self->randomID)
.detail("LastBackupContainer", lastBackupContainer->getURL())
.detail("BackupTag", printable(self->backupTag))
.detail("Description", desc.toString());
wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
for (auto& kvrange : self->backupRanges) {
// version needs to be decided before this transaction otherwise
// this clear mutation might be backup as well
tr->clear(kvrange);
}
return Void();
}));
TraceEvent("RW_Restore", self->randomID)
.detail("LastBackupContainer", lastBackupContainer->getURL())
.detail("BackupTag", printable(self->backupTag))
.detail("TargetVersion", targetVersion);
state int restoreIndex = 0;
// make sure system keys are not present in the restoreRanges as they will get restored first separately
// from the rest
Standalone<VectorRef<KeyRangeRef>> modifiedRestoreRanges;
for (int i = 0; i < self->restoreRanges.size(); ++i) {
if (config.tenantMode != TenantMode::REQUIRED ||
!self->restoreRanges[i].intersects(getSystemBackupRanges())) {
modifiedRestoreRanges.push_back_deep(modifiedRestoreRanges.arena(), self->restoreRanges[i]);
} else {
KeyRangeRef normalKeyRange = self->restoreRanges[i] & normalKeys;
if (!normalKeyRange.empty()) {
modifiedRestoreRanges.push_back_deep(modifiedRestoreRanges.arena(), normalKeyRange);
}
}
}
self->restoreRanges = modifiedRestoreRanges;
Standalone<StringRef> restoreTag(self->backupTag.toString() + "_" + std::to_string(restoreIndex));
printf("BackupCorrectness, backupAgent.restore is called for restoreIndex:%d tag:%s\n",
restoreIndex,
restoreTag.toString().c_str());
TraceEvent("RW_RestoreRanges", self->randomID)
.detail("RestoreIndex", restoreIndex)
.detail("RestoreTag", printable(restoreTag))
.detail("RestoreRanges", self->restoreRanges.size());
state Future<Version> restore;
restore = backupAgent.restore(cx,
cx,
restoreTag,
KeyRef(lastBackupContainer->getURL()),
lastBackupContainer->getProxy(),
self->restoreRanges,
WaitForComplete::True,
targetVersion,
Verbose::True,
Key(),
Key(),
self->locked,
UnlockDB::True,
OnlyApplyMutationLogs::False,
InconsistentSnapshotOnly::False,
::invalidVersion,
self->encryptionKeyFileName,
{});
wait(success(restore));
ASSERT(!restore.isError());
}
state Key backupAgentKey = uidPrefixKey(logRangesRange.begin, logUid);
state Key backupLogValuesKey = destUidValue.withPrefix(backupLogKeys.begin);
state Key backupLatestVersionsPath = destUidValue.withPrefix(backupLatestVersionsPrefix);
state Key backupLatestVersionsKey = uidPrefixKey(backupLatestVersionsPath, logUid);
state int displaySystemKeys = 0;
// Ensure that there is no left over key within the backup subspace
loop {
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
TraceEvent("RW_CheckLeftoverKeys", self->randomID).detail("BackupTag", printable(self->backupTag));
try {
// Check the left over tasks
// We have to wait for the list to empty since an abort and get status
// can leave extra tasks in the queue
TraceEvent("RW_CheckLeftoverTasks", self->randomID).detail("BackupTag", printable(self->backupTag));
state int64_t taskCount = wait(backupAgent.getTaskCount(tr));
state int waitCycles = 0;
while (taskCount > 0) {
waitCycles++;
TraceEvent("RW_NonzeroTaskWait", self->randomID)
.detail("BackupTag", printable(self->backupTag))
.detail("TaskCount", taskCount)
.detail("WaitCycles", waitCycles);
printf("%.6f %-10s Wait #%4d for %lld tasks to end\n",
now(),
self->randomID.toString().c_str(),
waitCycles,
(long long)taskCount);
wait(delay(5.0));
tr = makeReference<ReadYourWritesTransaction>(cx);
wait(store(taskCount, backupAgent.getTaskCount(tr)));
}
RangeResult agentValues =
wait(tr->getRange(KeyRange(KeyRangeRef(backupAgentKey, strinc(backupAgentKey))), 100));
// Error if the system keyspace for the backup tag is not empty
if (agentValues.size() > 0) {
displaySystemKeys++;
printf("BackupCorrectnessLeftOverMutationKeys: (%d) %s\n",
agentValues.size(),
printable(backupAgentKey).c_str());
TraceEvent(SevError, "BackupCorrectnessLeftOverMutationKeys", self->randomID)
.detail("BackupTag", printable(self->backupTag))
.detail("LeftOverKeys", agentValues.size())
.detail("KeySpace", printable(backupAgentKey));
for (auto& s : agentValues) {
TraceEvent("RW_LeftOverKey", self->randomID)
.detail("Key", printable(StringRef(s.key.toString())))
.detail("Value", printable(StringRef(s.value.toString())));
printf(" Key: %-50s Value: %s\n",
printable(StringRef(s.key.toString())).c_str(),
printable(StringRef(s.value.toString())).c_str());
}
} else {
printf("No left over backup agent configuration keys\n");
}
Optional<Value> latestVersion = wait(tr->get(backupLatestVersionsKey));
if (latestVersion.present()) {
TraceEvent(SevError, "BackupCorrectnessLeftOverVersionKey", self->randomID)
.detail("BackupTag", printable(self->backupTag))
.detail("BackupLatestVersionsKey", backupLatestVersionsKey.printable())
.detail("DestUidValue", destUidValue.printable());
} else {
printf("No left over backup version key\n");
}
RangeResult versions = wait(tr->getRange(
KeyRange(KeyRangeRef(backupLatestVersionsPath, strinc(backupLatestVersionsPath))), 1));
if (!self->shareLogRange || !versions.size()) {
RangeResult logValues = wait(
tr->getRange(KeyRange(KeyRangeRef(backupLogValuesKey, strinc(backupLogValuesKey))), 100));
// Error if the log/mutation keyspace for the backup tag is not empty
if (logValues.size() > 0) {
displaySystemKeys++;
printf("BackupCorrectnessLeftOverLogKeys: (%d) %s\n",
logValues.size(),
printable(backupLogValuesKey).c_str());
TraceEvent(SevError, "BackupCorrectnessLeftOverLogKeys", self->randomID)
.detail("BackupTag", printable(self->backupTag))
.detail("LeftOverKeys", logValues.size())
.detail("KeySpace", printable(backupLogValuesKey));
} else {
printf("No left over backup log keys\n");
}
}
break;
} catch (Error& e) {
TraceEvent("RW_CheckException", self->randomID).error(e);
wait(tr->onError(e));
}
}
if (displaySystemKeys) {
wait(TaskBucket::debugPrintRange(cx, normalKeys.end, StringRef()));
}
TraceEvent("RW_Complete", self->randomID).detail("BackupTag", printable(self->backupTag));
// Decrement the backup agent requets
if (self->agentRequest) {
RestoreWorkload::backupAgentRequests--;
}
// SOMEDAY: Remove after backup agents can exist quiescently
if ((g_simulator->backupAgents == ISimulator::BackupAgentType::BackupToFile) &&
(!RestoreWorkload::backupAgentRequests)) {
g_simulator->backupAgents = ISimulator::BackupAgentType::NoBackupAgents;
}
} catch (Error& e) {
TraceEvent(SevError, "BackupAndRestorePartitionedCorrectness").error(e).GetLastError();
throw;
}
return Void();
}
};
int RestoreWorkload::backupAgentRequests = 0;
WorkloadFactory<RestoreWorkload> RestoreWorkloadFactory;

View File

@ -483,7 +483,10 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.toml)
add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.toml)
add_fdb_test(TEST_FILES slow/ApiCorrectnessWithConsistencyCheck.toml)
add_fdb_test(TEST_FILES slow/BackupAndRestore.toml)
add_fdb_test(TEST_FILES slow/BackupCorrectnessPartitioned.toml)
add_fdb_test(TEST_FILES slow/BackupNewAndOldRestore.toml)
add_fdb_test(TEST_FILES slow/BackupOldAndNewRestore.toml)
add_fdb_test(TEST_FILES slow/ClogWithRollbacks.toml)
add_fdb_test(TEST_FILES slow/CloggedCycleTest.toml)
add_fdb_test(TEST_FILES slow/CloggedStorefront.toml)

View File

@ -0,0 +1,43 @@
testClass = "Backup"
[configuration]
tenantModes = ['disabled'] # Do not support tenant
encryptModes = ['disabled'] # Do not support encryption
[[test]]
testTitle = 'BackupPartitioned'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
[[test.workload]]
testName = 'Backup'
usePartitionedLog = true
backupAfter = 10.0
restoreAfter = 60.0
[[test]]
testTitle = 'RestorePartitioned'
simBackupAgents = 'BackupToFile'
clearAfterTest = false
[[test.workload]]
testName = 'Restore'
# check consistency after restore
[[test]]
testTitle = 'CycleAfterRestore'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 10.0
skipSetup = true
expectedRate = 0

View File

@ -1,7 +1,6 @@
testClass = "Backup"
[configuration]
buggify = false
tenantModes = ['disabled'] # Do not support tenant
encryptModes = ['disabled'] # Do not support encryption

View File

@ -0,0 +1,70 @@
testClass = "Backup"
[configuration]
tenantModes = ['disabled'] # Do not support tenant
encryptModes = ['disabled'] # Do not support encryption
[[test]]
testTitle = 'NewBackup'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
[[test.workload]]
testName = 'Backup'
usePartitionedLog = true
encrypted = false
backupTag = 'newBackup'
backupAfter = 10.0
restoreAfter = 60.0
[[test]]
testTitle = 'OldBackup'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 30.0
skipSetup = true
expectedRate = 0
[[test.workload]]
testName = 'Backup'
usePartitionedLog = false
encrypted = false
backupTag = 'oldBackup'
backupAfter = 10.0
restoreAfter = 60.0
# Randomly pick one of the backup tag and restore it.
[[test]]
testTitle = 'RestoreRandomBackup'
simBackupAgents = 'BackupToFile'
clearAfterTest = false
[[test.workload]]
testName = 'Restore'
backupTag1 = 'newBackup'
backupTag2 = 'oldBackup'
encrypted = false
# check consistency after restore
[[test]]
testTitle = 'CycleAfterRestore'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 10.0
skipSetup = true
expectedRate = 0

View File

@ -0,0 +1,71 @@
testClass = "Backup"
[configuration]
tenantModes = ['disabled'] # Do not support tenant
encryptModes = ['disabled'] # Do not support encryption
[[test]]
testTitle = 'OldBackup'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0
# TODO: pass encrypted file across backup and restore workload
[[test.workload]]
testName = 'Backup'
usePartitionedLog = false
encrypted = false
backupTag = 'oldBackup'
backupAfter = 10.0
restoreAfter = 60.0
[[test]]
testTitle = 'NewBackup'
clearAfterTest = false
simBackupAgents = 'BackupToFile'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 30.0
skipSetup = true
expectedRate = 0
[[test.workload]]
testName = 'Backup'
usePartitionedLog = true
encrypted = false
backupTag = 'newBackup'
backupAfter = 10.0
restoreAfter = 60.0
# Randomly pick one of the backup tag and restore it.
[[test]]
testTitle = 'RestoreRandomBackup'
simBackupAgents = 'BackupToFile'
clearAfterTest = false
[[test.workload]]
testName = 'Restore'
backupTag1 = 'oldBackup'
backupTag2 = 'newBackup'
encrypted = false
# check consistency after restore
[[test]]
testTitle = 'CycleAfterRestore'
[[test.workload]]
testName = 'Cycle'
nodeCount = 3000
transactionsPerSecond = 2500.0
testDuration = 10.0
skipSetup = true
expectedRate = 0