Stop backup workers before clearing DB in parallel restore workload
This is because the clearing of DB can be picked up by backup workers and be applied during restore, causing restore failures.
This commit is contained in:
parent
7e5551ea19
commit
4e128328f7
|
@ -179,7 +179,9 @@ struct BackupData {
|
|||
config.startedBackupWorkers().set(tr, workers.get());
|
||||
}
|
||||
for (auto p : workers.get()) {
|
||||
TraceEvent("BackupWorkerDebug", self->myId).detail("Epoch", p.first).detail("TagID", p.second);
|
||||
TraceEvent("BackupWorkerDebugTag", self->myId)
|
||||
.detail("Epoch", p.first)
|
||||
.detail("TagID", p.second);
|
||||
}
|
||||
wait(tr->commit());
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
#include "fdbclient/BackupContainer.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "fdbserver/workloads/BulkSetup.actor.h"
|
||||
#include "fdbclient/RestoreWorkerInterface.actor.h"
|
||||
|
@ -421,6 +422,11 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
// wait(attemptDirtyRestore(self, cx, &backupAgent, StringRef(lastBackupContainer->getURL()),
|
||||
// randomID));
|
||||
}
|
||||
|
||||
// We must ensure no backup workers are running, otherwise the clear DB
|
||||
// below can be picked up by backup workers and applied during restore.
|
||||
wait(success(changeConfig(cx, "backup_worker_enabled:=0", true)));
|
||||
|
||||
// Clear DB before restore
|
||||
wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
|
||||
for (auto& kvrange : self->backupRanges) tr->clear(kvrange);
|
||||
|
@ -437,12 +443,6 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
BackupDescription desc = wait(container->describeBackup());
|
||||
ASSERT(self->usePartitionedLogs == desc.partitioned);
|
||||
|
||||
TraceEvent("BAFRW_Restore", randomID)
|
||||
.detail("LastBackupContainer", lastBackupContainer->getURL())
|
||||
.detail("MinRestorableVersion", desc.minRestorableVersion.get())
|
||||
.detail("MaxRestorableVersion", desc.maxRestorableVersion.get())
|
||||
.detail("ContiguousLogEnd", desc.contiguousLogEnd.get());
|
||||
|
||||
state Version targetVersion = -1;
|
||||
if (desc.maxRestorableVersion.present()) {
|
||||
if (deterministicRandom()->random01() < 0.1) {
|
||||
|
@ -461,6 +461,13 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
TraceEvent("BAFRW_Restore", randomID)
|
||||
.detail("LastBackupContainer", lastBackupContainer->getURL())
|
||||
.detail("MinRestorableVersion", desc.minRestorableVersion.get())
|
||||
.detail("MaxRestorableVersion", desc.maxRestorableVersion.get())
|
||||
.detail("ContiguousLogEnd", desc.contiguousLogEnd.get())
|
||||
.detail("TargetVersion", targetVersion);
|
||||
|
||||
state std::vector<Future<Version>> restores;
|
||||
state std::vector<Standalone<StringRef>> restoreTags;
|
||||
|
||||
|
|
Loading…
Reference in New Issue