From b4ffc04e4e0ed767c2a6d48f58b770bbdba20da5 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 4 Oct 2020 18:11:04 -0700 Subject: [PATCH] Add UpgradeAndBackupRestore test --- fdbserver/CMakeLists.txt | 2 + fdbserver/SimulatedCluster.actor.cpp | 5 +- fdbserver/workloads/Cycle.actor.cpp | 5 + fdbserver/workloads/RestoreBackup.actor.cpp | 117 ++++++++++++++++++ fdbserver/workloads/SubmitBackup.actor.cpp | 74 +++++++++++ tests/CMakeLists.txt | 3 + .../from_7.0.0/UpgradeAndBackupRestore-1.toml | 53 ++++++++ .../from_7.0.0/UpgradeAndBackupRestore-2.toml | 65 ++++++++++ 8 files changed, 322 insertions(+), 2 deletions(-) create mode 100644 fdbserver/workloads/RestoreBackup.actor.cpp create mode 100644 fdbserver/workloads/SubmitBackup.actor.cpp create mode 100644 tests/restarting/from_7.0.0/UpgradeAndBackupRestore-1.toml create mode 100644 tests/restarting/from_7.0.0/UpgradeAndBackupRestore-2.toml diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 823150dfe9..03451fb0e0 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -181,6 +181,7 @@ set(FDBSERVER_SRCS workloads/ReadWrite.actor.cpp workloads/RemoveServersSafely.actor.cpp workloads/ReportConflictingKeys.actor.cpp + workloads/RestoreBackup.actor.cpp workloads/Rollback.actor.cpp workloads/RyowCorrectness.actor.cpp workloads/RYWDisable.actor.cpp @@ -195,6 +196,7 @@ set(FDBSERVER_SRCS workloads/StatusWorkload.actor.cpp workloads/Storefront.actor.cpp workloads/StreamingRead.actor.cpp + workloads/SubmitBackup.actor.cpp workloads/TagThrottleApi.actor.cpp workloads/TargetedKill.actor.cpp workloads/TaskBucketCorrectness.actor.cpp diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 0dd5a30cf5..db9ff664fe 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -642,8 +642,9 @@ ACTOR Future restartSimulatedSystem(vector>* systemActors, st // SOMEDAY: parse backup agent from test file systemActors->push_back(reportErrors( simulatedMachine(conn, ipAddrs, usingSSL, localities, processClass, baseFolder, true, - i == useSeedForMachine, enableExtraDB ? AgentAddition : AgentNone, - usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whitelistBinPaths), + i == useSeedForMachine, AgentAddition, + usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), + whitelistBinPaths), processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine")); } diff --git a/fdbserver/workloads/Cycle.actor.cpp b/fdbserver/workloads/Cycle.actor.cpp index b3098865d4..57ed30e8fe 100644 --- a/fdbserver/workloads/Cycle.actor.cpp +++ b/fdbserver/workloads/Cycle.actor.cpp @@ -38,6 +38,7 @@ struct CycleWorkload : TestWorkload { vector> clients; PerfIntCounter transactions, retries, tooOldRetries, commitFailedRetries; PerfDoubleCounter totalLatency; + bool verifyOnly; CycleWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), @@ -51,6 +52,7 @@ struct CycleWorkload : TestWorkload { keyPrefix = unprintable( getOption(options, "keyPrefix"_sr, LiteralStringRef("")).toString() ); traceParentProbability = getOption(options, "traceParentProbability "_sr, 0.01); minExpectedTransactionsPerSecond = transactionsPerSecond * getOption(options, "expectedRate"_sr, 0.7); + verifyOnly = getOption(options, "verifyOnly"_sr, false); } virtual std::string description() { return "CycleWorkload"; } @@ -58,6 +60,9 @@ struct CycleWorkload : TestWorkload { return bulkSetup( cx, this, nodeCount, Promise() ); } virtual Future start( Database const& cx ) { + if (verifyOnly) { + return Void(); + } for(int c=0; c backupContainer; + + Standalone backupDir; + Standalone tag; + double delayFor; + bool stopWhenDone; + + RestoreBackupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { + backupDir = getOption(options, LiteralStringRef("backupDir"), LiteralStringRef("file://simfdb/backups/")); + tag = getOption(options, LiteralStringRef("tag"), LiteralStringRef("default")); + delayFor = getOption(options, LiteralStringRef("delayFor"), 10.0); + stopWhenDone = getOption(options, LiteralStringRef("stopWhenDone"), false); + } + + static constexpr const char* DESCRIPTION = "RestoreBackup"; + + ACTOR static Future waitOnBackup(RestoreBackupWorkload* self, Database cx) { + state Version waitForVersion; + state UID backupUID; + state Transaction tr(cx); + loop { + try { + Version v = wait(tr.getReadVersion()); + waitForVersion = v; + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + EBackupState backupState = wait(self->backupAgent.waitBackup(cx, self->tag.toString(), self->stopWhenDone, + &self->backupContainer, &backupUID)); + if (backupState == EBackupState::STATE_COMPLETED) { + return Void(); + } else if (backupState == EBackupState::STATE_RUNNING_DIFFERENTIAL) { + ASSERT(!self->stopWhenDone); + loop { + BackupDescription desc = wait(self->backupContainer->describeBackup(true)); + TraceEvent("BackupVersionGate") + .detail("MaxLogEndVersion", desc.maxLogEnd.present() ? desc.maxLogEnd.get() : invalidVersion) + .detail("ContiguousLogEndVersion", + desc.contiguousLogEnd.present() ? desc.contiguousLogEnd.get() : invalidVersion) + .detail("TargetVersion", waitForVersion); + if (desc.contiguousLogEnd.present() && desc.contiguousLogEnd.get() >= waitForVersion) { + wait(self->backupAgent.abortBackup(cx, self->tag.toString())); + return Void(); + } + wait(delay(5.0)); + } + } else { + TraceEvent(SevError, "BadBackupState").detail("BackupState", BackupAgentBase::getStateText(backupState)); + ASSERT(false); + return Void(); + } + } + + ACTOR static Future clearDatabase(Database cx) { + // TODO: Batch to avoid large clear ranges? + state Transaction tr(cx); + loop { + try { + tr.clear(normalKeys); + wait(tr.commit()); + return Void(); + } catch (Error& e) { + wait(tr.onError(e)); + } + } + } + + ACTOR static Future _start(RestoreBackupWorkload* self, Database cx) { + wait(delay(self->delayFor)); + wait(waitOnBackup(self, cx)); + wait(clearDatabase(cx)); + wait(success(self->backupAgent.restore(cx, cx, self->tag, Key(self->backupContainer->getURL()), true, + ::invalidVersion, true))); + return Void(); + } + + std::string description() override { return DESCRIPTION; } + Future setup(Database const& cx) override { return Void(); } + Future start(Database const& cx) override { return clientId ? Void() : _start(this, cx); } + Future check(Database const& cx) override { return true; } + void getMetrics(vector& m) {} +}; + +WorkloadFactory RestoreBackupWorkloadFactory(RestoreBackupWorkload::DESCRIPTION); diff --git a/fdbserver/workloads/SubmitBackup.actor.cpp b/fdbserver/workloads/SubmitBackup.actor.cpp new file mode 100644 index 0000000000..3cec3b0953 --- /dev/null +++ b/fdbserver/workloads/SubmitBackup.actor.cpp @@ -0,0 +1,74 @@ +/* + * SubmitBackup.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/FDBTypes.h" +#include "fdbclient/ReadYourWrites.h" +#include "fdbrpc/simulator.h" +#include "fdbclient/BackupAgent.actor.h" +#include "fdbclient/BackupContainer.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +struct SubmitBackupWorkload : TestWorkload { + + FileBackupAgent backupAgent; + + Standalone backupDir; + Standalone tag; + double delayFor; + int snapshotInterval; + bool stopWhenDone; + bool incremental; + + SubmitBackupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { + backupDir = getOption(options, LiteralStringRef("backupDir"), LiteralStringRef("file://simfdb/backups/")); + tag = getOption(options, LiteralStringRef("tag"), LiteralStringRef("default")); + delayFor = getOption(options, LiteralStringRef("delayFor"), 10.0); + snapshotInterval = getOption(options, LiteralStringRef("snapshotInterval"), 1e8); + stopWhenDone = getOption(options, LiteralStringRef("stopWhenDone"), true); + incremental = getOption(options, LiteralStringRef("incremental"), false); + } + + static constexpr const char* DESCRIPTION = "SubmitBackup"; + + ACTOR static Future _start(SubmitBackupWorkload* self, Database cx) { + wait(delay(self->delayFor)); + Standalone> backupRanges; + backupRanges.push_back_deep(backupRanges.arena(), normalKeys); + try { + wait(self->backupAgent.submitBackup(cx, self->backupDir, self->snapshotInterval, self->tag.toString(), + backupRanges, self->stopWhenDone, false, self->incremental)); + } catch (Error& e) { + TraceEvent("BackupSubmitError").error(e); + if (e.code() != error_code_backup_duplicate) { + throw; + } + } + return Void(); + } + + std::string description() override { return DESCRIPTION; } + Future setup(Database const& cx) override { return Void(); } + Future start(Database const& cx) override { return clientId ? Void() : _start(this, cx); } + Future check(Database const& cx) override { return true; } + void getMetrics(vector& m) {} +}; + +WorkloadFactory SubmitBackupWorkloadFactory(SubmitBackupWorkload::DESCRIPTION); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 54e77bc470..6f4b19d205 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -194,6 +194,9 @@ if(WITH_PYTHON) add_fdb_test( TEST_FILES restarting/from_5.2.0/ClientTransactionProfilingCorrectness-1.txt restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt) + add_fdb_test( + TEST_FILES restarting/from_7.0.0/UpgradeAndBackupRestore-1.toml + restarting/from_7.0.0/UpgradeAndBackupRestore-2.toml) add_fdb_test( TEST_FILES restarting/to_6.3.5/CycleTestRestart-1.txt restarting/to_6.3.5/CycleTestRestart-2.txt) diff --git a/tests/restarting/from_7.0.0/UpgradeAndBackupRestore-1.toml b/tests/restarting/from_7.0.0/UpgradeAndBackupRestore-1.toml new file mode 100644 index 0000000000..4147451062 --- /dev/null +++ b/tests/restarting/from_7.0.0/UpgradeAndBackupRestore-1.toml @@ -0,0 +1,53 @@ +[[test]] +testTitle = 'SubmitBackup' +simBackupAgents = 'BackupToFile' +clearAfterTest = false + + [[test.workload]] + testName = 'SubmitBackup' + delayFor = 0 + stopWhenDone = false + +[[test]] +testTitle = 'FirstCycleTest' +clearAfterTest = false + + [[test.workload]] + testName = 'Cycle' + nodeCount = 30000 + transactionsPerSecond = 2500.0 + testDuration = 30.0 + expectedRate = 0 + keyPrefix = 'BeforeRestart' + + [[test.workload]] + testName = 'RandomClogging' + testDuration = 90.0 + + [[test.workload]] + testName = 'Rollback' + meanDelay = 90.0 + testDuration = 90.0 + + [[test.workload]] + testName = 'Attrition' + machinesToKill = 10 + machinesToLeave = 3 + reboot = true + testDuration = 90.0 + + [[test.workload]] + testName = 'Attrition' + machinesToKill = 10 + machinesToLeave = 3 + reboot = true + testDuration = 90.0 + +[[test]] +testTitle='SaveDatabase' +clearAfterTest = false + + [[test.workload]] + testName='SaveAndKill' + restartInfoLocation='simfdb/restartInfo.ini' + testDuration=30.0 diff --git a/tests/restarting/from_7.0.0/UpgradeAndBackupRestore-2.toml b/tests/restarting/from_7.0.0/UpgradeAndBackupRestore-2.toml new file mode 100644 index 0000000000..b7f60452bd --- /dev/null +++ b/tests/restarting/from_7.0.0/UpgradeAndBackupRestore-2.toml @@ -0,0 +1,65 @@ +[[test]] +testTitle = 'SecondCycleTest' +simBackupAgents = 'BackupToFile' + + [[test.workload]] + testName = 'Cycle' + nodeCount = 30000 + transactionsPerSecond = 2500.0 + testDuration = 30.0 + expectedRate = 0 + keyPrefix = 'AfterRestart' + + [[test.workload]] + testName = 'Cycle' + nodeCount = 30000 + transactionsPerSecond = 2500.0 + testDuration = 30.0 + expectedRate = 0 + keyPrefix = 'BeforeRestart' + verifyOnly = true + + [[test.workload]] + testName = 'RandomClogging' + testDuration = 90.0 + + [[test.workload]] + testName = 'Rollback' + meanDelay = 90.0 + testDuration = 90.0 + + [[test.workload]] + testName = 'Attrition' + machinesToKill = 10 + machinesToLeave = 3 + reboot = true + testDuration = 90.0 + + [[test.workload]] + testName = 'Attrition' + machinesToKill = 10 + machinesToLeave = 3 + reboot = true + testDuration = 90.0 + +[[test]] +testTitle = 'RestoreBackup' +simBackupAgents = 'BackupToFile' + + [[test.workload]] + testName = 'RestoreBackup' + tag = 'default' + + [[test.workload]] + testName = 'Cycle' + nodeCount = 30000 + keyPrefix = 'AfterRestart' + expectedRate = 0 + verifyOnly = true + + [[test.workload]] + testName = 'Cycle' + nodeCount = 30000 + keyPrefix = 'BeforeRestart' + expectedRate = 0 + verifyOnly = true