From 7780c24cbe4e3ae166daf9294c89585f5898fdea Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Mon, 21 Sep 2020 14:45:29 -0400 Subject: [PATCH 1/8] Attempt to use version instead of test duration to have backup capture all changes --- fdbserver/workloads/IncrementalBackup.actor.cpp | 14 ++++++++++++++ tests/fast/IncrementalBackup.toml | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fdbserver/workloads/IncrementalBackup.actor.cpp b/fdbserver/workloads/IncrementalBackup.actor.cpp index 2537a84c90..1f2cfd866e 100644 --- a/fdbserver/workloads/IncrementalBackup.actor.cpp +++ b/fdbserver/workloads/IncrementalBackup.actor.cpp @@ -19,6 +19,7 @@ */ #include "fdbclient/FDBTypes.h" +#include "fdbclient/ReadYourWrites.h" #include "fdbrpc/simulator.h" #include "fdbclient/BackupAgent.actor.h" #include "fdbclient/BackupContainer.h" @@ -32,12 +33,14 @@ struct IncrementalBackupWorkload : TestWorkload { FileBackupAgent backupAgent; bool submitOnly; bool restoreOnly; + bool waitVersion; IncrementalBackupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { backupDir = getOption(options, LiteralStringRef("backupDir"), LiteralStringRef("file://simfdb/backups/")); tag = getOption(options, LiteralStringRef("tag"), LiteralStringRef("default")); submitOnly = getOption(options, LiteralStringRef("submitOnly"), false); restoreOnly = getOption(options, LiteralStringRef("restoreOnly"), false); + waitVersion = getOption(options, LiteralStringRef("waitVersion"), false); } virtual std::string description() { return "IncrementalBackup"; } @@ -76,6 +79,17 @@ struct IncrementalBackupWorkload : TestWorkload { TraceEvent("IBackupRestoreAttempt"); wait(success(self->backupAgent.waitBackup(cx, self->tag.toString(), false, &backupContainer, &backupUID))); // TODO: add testing scenario for atomics and beginVersion + if (self->waitVersion) { + state Reference tr(new ReadYourWritesTransaction(cx)); + state Version v = wait(tr->getReadVersion()); + loop { + BackupDescription desc = wait(backupContainer->describeBackup()); + if (desc.maxLogEnd.get() >= v) break; + // Avoid spamming requests with a delay + wait(delay(3.0)); + } + + } wait(success(self->backupAgent.restore(cx, cx, Key(self->tag.toString()), Key(backupContainer->getURL()), true, -1, true, normalKeys, Key(), Key(), true, true))); TraceEvent("IBackupRestoreSuccess"); diff --git a/tests/fast/IncrementalBackup.toml b/tests/fast/IncrementalBackup.toml index ab965a3886..90c5898d7b 100644 --- a/tests/fast/IncrementalBackup.toml +++ b/tests/fast/IncrementalBackup.toml @@ -16,7 +16,7 @@ simBackupAgents = 'BackupToFile' testName = 'Cycle' nodeCount = 3000 transactionsPerSecond = 3000.0 - testDuration = 120.0 + testDuration = 10.0 expectedRate = 0 [[test]] @@ -28,6 +28,7 @@ simBackupAgents = 'BackupToFile' testName = 'IncrementalBackup' tag = 'default' restoreOnly = true + waitVersion = true [[test]] From 3c505786caa1bb0fb3cf8d276e9a6952cda24cb4 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Mon, 21 Sep 2020 20:19:15 -0700 Subject: [PATCH 2/8] Refactor fdbserver.actor.cpp --- fdbserver/fdbserver.actor.cpp | 265 ++++++++++++++++++++-------------- 1 file changed, 154 insertions(+), 111 deletions(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 514f7e5122..d770a86746 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -20,46 +20,50 @@ // There's something in one of the files below that defines a macros // a macro that makes boost interprocess break on Windows. -#include "flow/Tracing.h" -#include -#include #define BOOST_DATE_TIME_NO_LIB -#include -#include -#include "fdbrpc/simulator.h" -#include "flow/DeterministicRandom.h" -#include "fdbrpc/PerfMetric.h" -#include "flow/Platform.h" -#include "flow/SystemMonitor.h" -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/SystemData.h" -#include "fdbserver/CoordinationInterface.h" -#include "fdbserver/WorkerInterface.actor.h" -#include "fdbclient/RestoreWorkerInterface.actor.h" -#include "fdbserver/ServerDBInfo.h" -#include "fdbserver/MoveKeys.actor.h" -#include "fdbserver/ConflictSet.h" -#include "fdbserver/DataDistribution.actor.h" -#include "fdbserver/NetworkTest.h" -#include "fdbserver/IKeyValueStore.h" #include +#include +#include +#include +#include + #include #include -#include -#include "fdbserver/pubsub.h" -#include "fdbserver/SimulatedCluster.h" -#include "fdbserver/TesterInterface.actor.h" -#include "fdbserver/workloads/workloads.actor.h" #include -#include "fdbserver/Status.h" -#include "fdbrpc/Net2FileSystem.h" -#include "fdbrpc/AsyncFileCached.actor.h" -#include "fdbserver/CoroFlow.h" -#include "flow/TLSConfig.actor.h" -#include "fdbclient/versions.h" +#include +#include + +#include "fdbclient/NativeAPI.actor.h" +#include "fdbclient/RestoreWorkerInterface.actor.h" +#include "fdbclient/SystemData.h" +#include "fdbclient/versions.h" #include "fdbmonitor/SimpleIni.h" +#include "fdbrpc/AsyncFileCached.actor.h" +#include "fdbrpc/Net2FileSystem.h" +#include "fdbrpc/PerfMetric.h" +#include "fdbrpc/simulator.h" +#include "fdbserver/ConflictSet.h" +#include "fdbserver/CoordinationInterface.h" +#include "fdbserver/CoroFlow.h" +#include "fdbserver/DataDistribution.actor.h" +#include "fdbserver/IKeyValueStore.h" +#include "fdbserver/MoveKeys.actor.h" +#include "fdbserver/NetworkTest.h" +#include "fdbserver/ServerDBInfo.h" +#include "fdbserver/SimulatedCluster.h" +#include "fdbserver/Status.h" +#include "fdbserver/TesterInterface.actor.h" +#include "fdbserver/WorkerInterface.actor.h" +#include "fdbserver/pubsub.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "flow/DeterministicRandom.h" +#include "flow/Platform.h" +#include "flow/SimpleOpt.h" +#include "flow/SystemMonitor.h" +#include "flow/TLSConfig.actor.h" +#include "flow/Tracing.h" #if defined(__linux__) || defined(__FreeBSD__) #include @@ -75,8 +79,7 @@ #include #endif -#include "flow/SimpleOpt.h" -#include + #include "flow/actorcompiler.h" // This must be the last #include. // clang-format off @@ -471,152 +474,192 @@ static void printHelpTeaser( const char *name ) { fprintf(stderr, "Try `%s --help' for more information.\n", name); } +static void printOptionUsage(std::string option, std::string description) { + static const std::string OPTION_INDENT(" "); + static const std::string DESCRIPTION_INDENT(" "); + static const int WIDTH = 80; + + boost::algorithm::trim(option); + boost::algorithm::trim(description); + + std::string result = OPTION_INDENT + option + "\n"; + + std::stringstream sstream(description); + if (sstream.eof()) { + printf(result.c_str()); + return; + } + + std::string currWord; + sstream >> currWord; + + std::string currLine(DESCRIPTION_INDENT + ' ' + currWord); + int currLength = DESCRIPTION_INDENT.size(); + + while(!sstream.eof()) { + sstream >> currWord; + + if (currLength + static_cast(currWord.size()) + 1 > WIDTH) { + result += currLine + '\n'; + currLine = DESCRIPTION_INDENT + ' ' + currWord; + } else { + currLine += ' ' + currWord; + } + currLength = currLine.size(); + } + result += currLine + '\n'; + + printf(result.c_str()); +} + static void printUsage( const char *name, bool devhelp ) { printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n"); printf("Usage: %s -p ADDRESS [OPTIONS]\n\n", name); - printf(" -p ADDRESS, --public_address ADDRESS\n" + printOptionUsage(" -p ADDRESS, --public_address ADDRESS", " Public address, specified as `IP_ADDRESS:PORT' or `auto:PORT'.\n"); - printf(" -l ADDRESS, --listen_address ADDRESS\n" - " Listen address, specified as `IP_ADDRESS:PORT' (defaults to\n"); - printf(" public address).\n"); - printf(" -C CONNFILE, --cluster_file CONNFILE\n" + printOptionUsage(" -l ADDRESS, --listen_address ADDRESS\n", + " Listen address, specified as `IP_ADDRESS:PORT' (defaults to\n" + " public address).\n"); + printOptionUsage(" -C CONNFILE, --cluster_file CONNFILE\n", " The path of a file containing the connection string for the\n" " FoundationDB cluster. The default is first the value of the\n" " FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n" - " then `%s'.\n", platform::getDefaultClusterFilePath().c_str()); - printf(" --seed_cluster_file SEEDCONNFILE\n" + " then `" + platform::getDefaultClusterFilePath() + "'."); + printOptionUsage(" --seed_cluster_file SEEDCONNFILE\n", " The path of a seed cluster file which will be used to connect\n" " if the -C cluster file does not exist. If the server connects\n" " successfully using the seed file, then it copies the file to\n" " the -C file location.\n"); - printf(" --seed_connection_string SEEDCONNSTRING\n" + printOptionUsage(" --seed_connection_string SEEDCONNSTRING\n", " The path of a seed connection string which will be used to connect\n" " if the -C cluster file does not exist. If the server connects\n" " successfully using the seed string, then it copies the string to\n" " the -C file location.\n"); #ifdef __linux__ - printf(" --data_filesystem PATH\n" + printOptionUsage(" --data_filesystem PATH\n", " Turns on validation that all data files are written to a drive\n" " mounted at the specified PATH. This checks that the device at PATH\n" " is currently mounted and that any data files get written to the\n" " same device.\n"); #endif - printf(" -d PATH, --datadir PATH\n" - " Store data files in the given folder (must be unique for each\n"); - printf(" fdbserver instance on a given machine).\n"); - printf(" -L PATH, --logdir PATH\n" + printOptionUsage(" -d PATH, --datadir PATH\n", + " Store data files in the given folder (must be unique for each\n" + " fdbserver instance on a given machine).\n"); + printOptionUsage(" -L PATH, --logdir PATH\n", " Store log files in the given folder (default is `.').\n"); - printf(" --logsize SIZE Roll over to a new log file after the current log file\n" + printOptionUsage(" --logsize SIZE", + "Roll over to a new log file after the current log file\n" " exceeds SIZE bytes. The default value is 10MiB.\n"); - printf(" --maxlogs SIZE, --maxlogssize SIZE\n" + printOptionUsage(" --maxlogs SIZE, --maxlogssize SIZE\n", " Delete the oldest log file when the total size of all log\n" " files exceeds SIZE bytes. If set to 0, old log files will not\n" " be deleted. The default value is 100MiB.\n"); - printf(" --loggroup LOG_GROUP\n" + printOptionUsage(" --loggroup LOG_GROUP\n", " Sets the LogGroup field with the specified value for all\n" " events in the trace output (defaults to `default').\n"); - printf(" --trace_format FORMAT\n" + printOptionUsage(" --trace_format FORMAT\n", " Select the format of the log files. xml (the default) and json\n" " are supported.\n"); - printf(" --tracer TRACER\n" + printOptionUsage(" --tracer TRACER\n", " Select a tracer for transaction tracing. Currently disabled\n" " (the default) and log_file are supported.\n"); - printf(" -i ID, --machine_id ID\n" + printOptionUsage(" -i ID, --machine_id ID\n", " Machine and zone identifier key (up to 16 hex characters).\n" " Defaults to a random value shared by all fdbserver processes\n" " on this machine.\n"); - printf(" -a ID, --datacenter_id ID\n" + printOptionUsage(" -a ID, --datacenter_id ID\n", " Data center identifier key (up to 16 hex characters).\n"); - printf(" --locality_LOCALITYKEY LOCALITYVALUE\n" + printOptionUsage(" --locality_LOCALITYKEY LOCALITYVALUE\n", " Define a locality key. LOCALITYKEY is case-insensitive though\n" " LOCALITYVALUE is not.\n"); - printf(" -m SIZE, --memory SIZE\n" + printOptionUsage(" -m SIZE, --memory SIZE\n", " Memory limit. The default value is 8GiB. When specified\n" " without a unit, MiB is assumed.\n"); - printf(" -M SIZE, --storage_memory SIZE\n" + printOptionUsage(" -M SIZE, --storage_memory SIZE\n", " Maximum amount of memory used for storage. The default\n" " value is 1GiB. When specified without a unit, MB is\n" " assumed.\n"); - printf(" --cache_memory SIZE\n" + printOptionUsage(" --cache_memory SIZE\n", " The amount of memory to use for caching disk pages.\n" " The default value is 2GiB. When specified without a unit,\n" " MiB is assumed.\n"); - printf(" -c CLASS, --class CLASS\n" + printOptionUsage(" -c CLASS, --class CLASS\n", " Machine class (valid options are storage, transaction,\n" - " resolution, grv_proxy, proxy, master, test, unset, stateless, log, router,\n" + " resolution, grv_proxy, commit_proxy, master, test, unset, stateless, log, router,\n" " and cluster_controller).\n"); #ifndef TLS_DISABLED printf(TLS_HELP); #endif - printf(" -v, --version Print version information and exit.\n"); - printf(" -h, -?, --help Display this help and exit.\n"); + printOptionUsage(" -v, --version", "Print version information and exit.\n"); + printOptionUsage(" -h, -?, --help", "Display this help and exit.\n"); if( devhelp ) { - printf(" -r ROLE, --role ROLE\n" - " Server role (valid options are fdbd, test, multitest,\n"); - printf(" simulation, networktestclient, networktestserver, restore\n"); - printf(" consistencycheck, kvfileintegritycheck, kvfilegeneratesums). The default is `fdbd'.\n"); + printOptionUsage(" -r ROLE, --role ROLE\n", + " Server role (valid options are fdbd, test, multitest,\n" + " simulation, networktestclient, networktestserver, restore\n" + " consistencycheck, kvfileintegritycheck, kvfilegeneratesums). The default is `fdbd'.\n"); #ifdef _WIN32 - printf(" -n, --newconsole\n" + printOptionUsage(" -n, --newconsole\n", " Create a new console.\n"); - printf(" -q, --no_dialog\n" + printOptionUsage(" -q, --no_dialog\n", " Disable error dialog on crash.\n"); - printf(" --parentpid PID\n"); - printf(" Specify a process after whose termination to exit.\n"); + printOptionUsage(" --parentpid PID\n", + " Specify a process after whose termination to exit.\n"); #endif - printf(" -f TESTFILE, --testfile\n" + printOptionUsage(" -f TESTFILE, --testfile\n", " Testfile to run, defaults to `tests/default.txt'.\n"); - printf(" -R, --restarting\n"); - printf(" Restart a previous simulation that was cleanly shut down.\n"); - printf(" -s SEED, --seed SEED\n" + printOptionUsage(" -R, --restarting\n", + " Restart a previous simulation that was cleanly shut down.\n"); + printOptionUsage(" -s SEED, --seed SEED\n", " Random seed.\n"); - printf(" -k KEY, --key KEY Target key for search role.\n"); - printf(" --kvfile FILE Input file (SQLite database file) for use by the 'kvfilegeneratesums' and 'kvfileintegritycheck' roles.\n"); - printf(" -b [on,off], --buggify [on,off]\n" + printOptionUsage(" -k KEY, --key KEY", "Target key for search role.\n"); + printOptionUsage(" --kvfile FILE", + "Input file (SQLite database file) for use by the 'kvfilegeneratesums' and 'kvfileintegritycheck' roles.\n"); + printOptionUsage(" -b [on,off], --buggify [on,off]\n", " Sets Buggify system state, defaults to `off'.\n"); - printf(" --crash Crash on serious errors instead of continuing.\n"); - printf(" -N NETWORKIMPL, --network NETWORKIMPL\n" - " Select network implementation, `net2' (default),\n"); - printf(" `net2-threadpool'.\n"); - printf(" --unbufferedout\n"); - printf(" Do not buffer stdout and stderr.\n"); - printf(" --bufferedout\n"); - printf(" Buffer stdout and stderr.\n"); - printf(" --traceclock CLOCKIMPL\n"); - printf(" Select clock source for trace files, `now' (default) or\n"); - printf(" `realtime'.\n"); - printf(" --num_testers NUM\n"); - printf(" A multitester will wait for NUM testers before starting\n"); - printf(" (defaults to 1).\n"); + printOptionUsage(" --crash", "Crash on serious errors instead of continuing.\n"); + printOptionUsage(" -N NETWORKIMPL, --network NETWORKIMPL\n", + " Select network implementation, `net2' (default),\n" + " `net2-threadpool'.\n"); + printOptionUsage(" --unbufferedout\n", + " Do not buffer stdout and stderr.\n"); + printOptionUsage(" --bufferedout\n", + " Buffer stdout and stderr.\n"); + printOptionUsage(" --traceclock CLOCKIMPL\n", + " Select clock source for trace files, `now' (default) or\n" + " `realtime'.\n"); + printOptionUsage(" --num_testers NUM\n", + " A multitester will wait for NUM testers before starting\n" + " (defaults to 1).\n"); #ifdef __linux__ - printf(" --rsssize SIZE\n" + printOptionUsage(" --rsssize SIZE\n" " Turns on automatic heap profiling when RSS memory size exceeds\n" " the given threshold. fdbserver needs to be compiled with\n" " USE_GPERFTOOLS flag in order to use this feature.\n"); #endif - printf(" --testservers ADDRESSES\n"); - printf(" The addresses of networktestservers\n"); - printf(" specified as ADDRESS:PORT,ADDRESS:PORT...\n"); - printf(" --testonservers\n"); - printf(" Testers are recruited on servers.\n"); - printf(" --metrics_cluster CONNFILE\n"); - printf(" The cluster file designating where this process will\n"); - printf(" store its metric data. By default metrics will be stored\n"); - printf(" in the same database the process is participating in.\n"); - printf(" --metrics_prefix PREFIX\n"); - printf(" The prefix where this process will store its metric data.\n"); - printf(" Must be specified if using a different database for metrics.\n"); - printf(" --knob_KNOBNAME KNOBVALUE\n"); - printf(" Changes a database knob. KNOBNAME should be lowercase.\n"); - printf(" --io_trust_seconds SECONDS\n"); - printf(" Sets the time in seconds that a read or write operation is allowed to take\n" + printOptionUsage(" --testservers ADDRESSES\n", + " The addresses of networktestservers\n" + " specified as ADDRESS:PORT,ADDRESS:PORT...\n"); + printOptionUsage(" --testonservers\n", + " Testers are recruited on servers.\n"); + printOptionUsage(" --metrics_cluster CONNFILE\n", + " The cluster file designating where this process will\n" + " store its metric data. By default metrics will be stored\n" + " in the same database the process is participating in.\n"); + printOptionUsage(" --metrics_prefix PREFIX\n", + " The prefix where this process will store its metric data.\n" + " Must be specified if using a different database for metrics.\n"); + printOptionUsage(" --knob_KNOBNAME KNOBVALUE\n", + " Changes a database knob. KNOBNAME should be lowercase.\n"); + printOptionUsage(" --io_trust_seconds SECONDS\n", + " Sets the time in seconds that a read or write operation is allowed to take\n" " before timing out with an error. If an operation times out, all future\n" " operations on that file will fail with an error as well. Only has an effect\n" " when using AsyncFileKAIO in Linux.\n"); - printf(" --io_trust_warn_only\n"); - printf(" Instead of failing when an I/O operation exceeds io_trust_seconds, just\n" + printOptionUsage(" --io_trust_warn_only\n", + " Instead of failing when an I/O operation exceeds io_trust_seconds, just\n" " log a warning to the trace log. Has no effect if io_trust_seconds is unspecified.\n"); } else { - printf(" --dev-help Display developer-specific help and exit.\n"); + printOptionUsage(" --dev-help", "Display developer-specific help and exit.\n"); } printf("\n" From 2bf4ebbd5118e74417911eb00561afa3c0c685f8 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Mon, 21 Sep 2020 20:43:07 -0700 Subject: [PATCH 3/8] fixup! Add missing comma --- fdbserver/fdbserver.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index d770a86746..aaa9c43936 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -631,7 +631,7 @@ static void printUsage( const char *name, bool devhelp ) { " A multitester will wait for NUM testers before starting\n" " (defaults to 1).\n"); #ifdef __linux__ - printOptionUsage(" --rsssize SIZE\n" + printOptionUsage(" --rsssize SIZE\n", " Turns on automatic heap profiling when RSS memory size exceeds\n" " the given threshold. fdbserver needs to be compiled with\n" " USE_GPERFTOOLS flag in order to use this feature.\n"); From e2ea6bbdd075b86e4d57175310498419970e5f05 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 23 Sep 2020 11:19:02 -0700 Subject: [PATCH 4/8] fixup! Use currLine.size() for the first currLength in printOptionUsage --- fdbserver/fdbserver.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index aaa9c43936..5ba90133e4 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -494,7 +494,7 @@ static void printOptionUsage(std::string option, std::string description) { sstream >> currWord; std::string currLine(DESCRIPTION_INDENT + ' ' + currWord); - int currLength = DESCRIPTION_INDENT.size(); + int currLength = currLine.size(); while(!sstream.eof()) { sstream >> currWord; From 285e2594ef7e69cd00ff10d313e60eda8705a295 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 23 Sep 2020 14:28:47 -0400 Subject: [PATCH 5/8] adjusted delay and timing of version gate --- .../workloads/IncrementalBackup.actor.cpp | 49 +++++++++++++------ tests/fast/IncrementalBackup.toml | 6 ++- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/fdbserver/workloads/IncrementalBackup.actor.cpp b/fdbserver/workloads/IncrementalBackup.actor.cpp index 1f2cfd866e..4e748f99f2 100644 --- a/fdbserver/workloads/IncrementalBackup.actor.cpp +++ b/fdbserver/workloads/IncrementalBackup.actor.cpp @@ -54,11 +54,37 @@ struct IncrementalBackupWorkload : TestWorkload { return _start(cx, this); } - virtual Future check(Database const& cx) { return true; } + virtual Future check(Database const& cx) { + if (clientId || !waitVersion) { + return true; + } + return _check(cx, this); + } + + ACTOR static Future _check(Database cx, IncrementalBackupWorkload* self) { + state Reference backupContainer; + state UID backupUID; + int waitResult = + wait(self->backupAgent.waitBackup(cx, self->tag.toString(), false, &backupContainer, &backupUID)); + TraceEvent("IBackupCheckWaitResult").detail("Result", waitResult); + state Reference tr(new ReadYourWritesTransaction(cx)); + state Version v = wait(tr->getReadVersion()); + loop { + BackupDescription desc = wait(backupContainer->describeBackup(true)); + TraceEvent("IBackupVersionGate") + .detail("MaxLogEndVersion", desc.maxLogEnd.present() ? desc.maxLogEnd.get() : invalidVersion) + .detail("ContiguousLogEndVersion", + desc.contiguousLogEnd.present() ? desc.contiguousLogEnd.get() : invalidVersion) + .detail("TargetVersion", v); + if (!desc.contiguousLogEnd.present()) continue; + if (desc.contiguousLogEnd.get() >= v) break; + // Avoid spamming requests with a delay + wait(delay(5.0)); + } + return true; + } ACTOR static Future _start(Database cx, IncrementalBackupWorkload* self) { - // Add a commit both before the submit and restore to test that incremental backup - // can be performed on non-empty database if (self->submitOnly) { Standalone> backupRanges; backupRanges.push_back_deep(backupRanges.arena(), normalKeys); @@ -66,7 +92,12 @@ struct IncrementalBackupWorkload : TestWorkload { try { wait(self->backupAgent.submitBackup(cx, self->backupDir, 1e8, self->tag.toString(), backupRanges, false, false, true)); + // Wait for backup container to be created and avoid race condition + wait(delay(60.0)); + int waitResult = wait(self->backupAgent.waitBackup(cx, self->tag.toString(), false)); + TraceEvent("IBackupSubmitWaitResult").detail("Result", waitResult); } catch (Error& e) { + TraceEvent("IBackupSubmitError").error(e); if (e.code() != error_code_backup_duplicate) { throw; } @@ -78,18 +109,6 @@ struct IncrementalBackupWorkload : TestWorkload { state UID backupUID; TraceEvent("IBackupRestoreAttempt"); wait(success(self->backupAgent.waitBackup(cx, self->tag.toString(), false, &backupContainer, &backupUID))); - // TODO: add testing scenario for atomics and beginVersion - if (self->waitVersion) { - state Reference tr(new ReadYourWritesTransaction(cx)); - state Version v = wait(tr->getReadVersion()); - loop { - BackupDescription desc = wait(backupContainer->describeBackup()); - if (desc.maxLogEnd.get() >= v) break; - // Avoid spamming requests with a delay - wait(delay(3.0)); - } - - } wait(success(self->backupAgent.restore(cx, cx, Key(self->tag.toString()), Key(backupContainer->getURL()), true, -1, true, normalKeys, Key(), Key(), true, true))); TraceEvent("IBackupRestoreSuccess"); diff --git a/tests/fast/IncrementalBackup.toml b/tests/fast/IncrementalBackup.toml index 90c5898d7b..10a97e447d 100644 --- a/tests/fast/IncrementalBackup.toml +++ b/tests/fast/IncrementalBackup.toml @@ -19,6 +19,11 @@ simBackupAgents = 'BackupToFile' testDuration = 10.0 expectedRate = 0 + [[test.workload]] + testName = 'IncrementalBackup' + tag = 'default' + waitVersion = true + [[test]] testTitle = 'SubmitRestore' clearAfterTest = false @@ -28,7 +33,6 @@ simBackupAgents = 'BackupToFile' testName = 'IncrementalBackup' tag = 'default' restoreOnly = true - waitVersion = true [[test]] From 581427c88079fe5b582afd925346eb02d37c1080 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 23 Sep 2020 12:03:09 -0700 Subject: [PATCH 6/8] fixup! Remove "\n" and extra spaces in printUsage --- fdbserver/fdbserver.actor.cpp | 268 +++++++++++++++++----------------- 1 file changed, 134 insertions(+), 134 deletions(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 5ba90133e4..5aa7a9e268 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -515,151 +515,151 @@ static void printOptionUsage(std::string option, std::string description) { static void printUsage( const char *name, bool devhelp ) { printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n"); printf("Usage: %s -p ADDRESS [OPTIONS]\n\n", name); - printOptionUsage(" -p ADDRESS, --public_address ADDRESS", - " Public address, specified as `IP_ADDRESS:PORT' or `auto:PORT'.\n"); - printOptionUsage(" -l ADDRESS, --listen_address ADDRESS\n", - " Listen address, specified as `IP_ADDRESS:PORT' (defaults to\n" - " public address).\n"); - printOptionUsage(" -C CONNFILE, --cluster_file CONNFILE\n", - " The path of a file containing the connection string for the\n" - " FoundationDB cluster. The default is first the value of the\n" - " FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n" - " then `" + platform::getDefaultClusterFilePath() + "'."); - printOptionUsage(" --seed_cluster_file SEEDCONNFILE\n", - " The path of a seed cluster file which will be used to connect\n" - " if the -C cluster file does not exist. If the server connects\n" - " successfully using the seed file, then it copies the file to\n" - " the -C file location.\n"); - printOptionUsage(" --seed_connection_string SEEDCONNSTRING\n", - " The path of a seed connection string which will be used to connect\n" - " if the -C cluster file does not exist. If the server connects\n" - " successfully using the seed string, then it copies the string to\n" - " the -C file location.\n"); + printOptionUsage("-p ADDRESS, --public_address ADDRESS", + " Public address, specified as `IP_ADDRESS:PORT' or `auto:PORT'."); + printOptionUsage("-l ADDRESS, --listen_address ADDRESS", + " Listen address, specified as `IP_ADDRESS:PORT' (defaults to" + " public address)."); + printOptionUsage("-C CONNFILE, --cluster_file CONNFILE", + " The path of a file containing the connection string for the" + " FoundationDB cluster. The default is first the value of the" + " FDB_CLUSTER_FILE environment variable, then `./fdb.cluster'," + " then `" + platform::getDefaultClusterFilePath() + "'."); + printOptionUsage("--seed_cluster_file SEEDCONNFILE", + " The path of a seed cluster file which will be used to connect" + " if the -C cluster file does not exist. If the server connects" + " successfully using the seed file, then it copies the file to" + " the -C file location."); + printOptionUsage("--seed_connection_string SEEDCONNSTRING", + " The path of a seed connection string which will be used to connect" + " if the -C cluster file does not exist. If the server connects" + " successfully using the seed string, then it copies the string to" + " the -C file location."); #ifdef __linux__ - printOptionUsage(" --data_filesystem PATH\n", - " Turns on validation that all data files are written to a drive\n" - " mounted at the specified PATH. This checks that the device at PATH\n" - " is currently mounted and that any data files get written to the\n" - " same device.\n"); + printOptionUsage("--data_filesystem PATH", + " Turns on validation that all data files are written to a drive" + " mounted at the specified PATH. This checks that the device at PATH" + " is currently mounted and that any data files get written to the" + " same device."); #endif - printOptionUsage(" -d PATH, --datadir PATH\n", - " Store data files in the given folder (must be unique for each\n" - " fdbserver instance on a given machine).\n"); - printOptionUsage(" -L PATH, --logdir PATH\n", - " Store log files in the given folder (default is `.').\n"); - printOptionUsage(" --logsize SIZE", - "Roll over to a new log file after the current log file\n" - " exceeds SIZE bytes. The default value is 10MiB.\n"); - printOptionUsage(" --maxlogs SIZE, --maxlogssize SIZE\n", - " Delete the oldest log file when the total size of all log\n" - " files exceeds SIZE bytes. If set to 0, old log files will not\n" - " be deleted. The default value is 100MiB.\n"); - printOptionUsage(" --loggroup LOG_GROUP\n", - " Sets the LogGroup field with the specified value for all\n" - " events in the trace output (defaults to `default').\n"); - printOptionUsage(" --trace_format FORMAT\n", - " Select the format of the log files. xml (the default) and json\n" - " are supported.\n"); - printOptionUsage(" --tracer TRACER\n", - " Select a tracer for transaction tracing. Currently disabled\n" - " (the default) and log_file are supported.\n"); - printOptionUsage(" -i ID, --machine_id ID\n", - " Machine and zone identifier key (up to 16 hex characters).\n" - " Defaults to a random value shared by all fdbserver processes\n" - " on this machine.\n"); - printOptionUsage(" -a ID, --datacenter_id ID\n", - " Data center identifier key (up to 16 hex characters).\n"); - printOptionUsage(" --locality_LOCALITYKEY LOCALITYVALUE\n", - " Define a locality key. LOCALITYKEY is case-insensitive though\n" - " LOCALITYVALUE is not.\n"); - printOptionUsage(" -m SIZE, --memory SIZE\n", - " Memory limit. The default value is 8GiB. When specified\n" - " without a unit, MiB is assumed.\n"); - printOptionUsage(" -M SIZE, --storage_memory SIZE\n", - " Maximum amount of memory used for storage. The default\n" - " value is 1GiB. When specified without a unit, MB is\n" - " assumed.\n"); - printOptionUsage(" --cache_memory SIZE\n", - " The amount of memory to use for caching disk pages.\n" - " The default value is 2GiB. When specified without a unit,\n" - " MiB is assumed.\n"); - printOptionUsage(" -c CLASS, --class CLASS\n", - " Machine class (valid options are storage, transaction,\n" - " resolution, grv_proxy, commit_proxy, master, test, unset, stateless, log, router,\n" - " and cluster_controller).\n"); + printOptionUsage("-d PATH, --datadir PATH", + " Store data files in the given folder (must be unique for each" + " fdbserver instance on a given machine)."); + printOptionUsage("-L PATH, --logdir PATH", + " Store log files in the given folder (default is `.')."); + printOptionUsage("--logsize SIZE", + "Roll over to a new log file after the current log file" + " exceeds SIZE bytes. The default value is 10MiB."); + printOptionUsage("--maxlogs SIZE, --maxlogssize SIZE", + " Delete the oldest log file when the total size of all log" + " files exceeds SIZE bytes. If set to 0, old log files will not" + " be deleted. The default value is 100MiB."); + printOptionUsage("--loggroup LOG_GROUP", + " Sets the LogGroup field with the specified value for all" + " events in the trace output (defaults to `default')."); + printOptionUsage("--trace_format FORMAT", + " Select the format of the log files. xml (the default) and json" + " are supported."); + printOptionUsage("--tracer TRACER", + " Select a tracer for transaction tracing. Currently disabled" + " (the default) and log_file are supported."); + printOptionUsage("-i ID, --machine_id ID", + " Machine and zone identifier key (up to 16 hex characters)." + " Defaults to a random value shared by all fdbserver processes" + " on this machine."); + printOptionUsage("-a ID, --datacenter_id ID", + " Data center identifier key (up to 16 hex characters)."); + printOptionUsage("--locality_LOCALITYKEY LOCALITYVALUE", + " Define a locality key. LOCALITYKEY is case-insensitive though" + " LOCALITYVALUE is not."); + printOptionUsage("-m SIZE, --memory SIZE", + " Memory limit. The default value is 8GiB. When specified" + " without a unit, MiB is assumed."); + printOptionUsage("-M SIZE, --storage_memory SIZE", + " Maximum amount of memory used for storage. The default" + " value is 1GiB. When specified without a unit, MB is" + " assumed."); + printOptionUsage("--cache_memory SIZE", + " The amount of memory to use for caching disk pages." + " The default value is 2GiB. When specified without a unit," + " MiB is assumed."); + printOptionUsage("-c CLASS, --class CLASS", + " Machine class (valid options are storage, transaction," + " resolution, grv_proxy, commit_proxy, master, test, unset, stateless, log, router," + " and cluster_controller)."); #ifndef TLS_DISABLED printf(TLS_HELP); #endif - printOptionUsage(" -v, --version", "Print version information and exit.\n"); - printOptionUsage(" -h, -?, --help", "Display this help and exit.\n"); + printOptionUsage("-v, --version", "Print version information and exit."); + printOptionUsage("-h, -?, --help", "Display this help and exit."); if( devhelp ) { - printOptionUsage(" -r ROLE, --role ROLE\n", - " Server role (valid options are fdbd, test, multitest,\n" - " simulation, networktestclient, networktestserver, restore\n" - " consistencycheck, kvfileintegritycheck, kvfilegeneratesums). The default is `fdbd'.\n"); + printOptionUsage("-r ROLE, --role ROLE", + " Server role (valid options are fdbd, test, multitest," + " simulation, networktestclient, networktestserver, restore" + " consistencycheck, kvfileintegritycheck, kvfilegeneratesums). The default is `fdbd'."); #ifdef _WIN32 - printOptionUsage(" -n, --newconsole\n", - " Create a new console.\n"); - printOptionUsage(" -q, --no_dialog\n", - " Disable error dialog on crash.\n"); - printOptionUsage(" --parentpid PID\n", - " Specify a process after whose termination to exit.\n"); + printOptionUsage("-n, --newconsole", + " Create a new console."); + printOptionUsage("-q, --no_dialog", + " Disable error dialog on crash."); + printOptionUsage("--parentpid PID", + " Specify a process after whose termination to exit."); #endif - printOptionUsage(" -f TESTFILE, --testfile\n", - " Testfile to run, defaults to `tests/default.txt'.\n"); - printOptionUsage(" -R, --restarting\n", - " Restart a previous simulation that was cleanly shut down.\n"); - printOptionUsage(" -s SEED, --seed SEED\n", - " Random seed.\n"); - printOptionUsage(" -k KEY, --key KEY", "Target key for search role.\n"); - printOptionUsage(" --kvfile FILE", - "Input file (SQLite database file) for use by the 'kvfilegeneratesums' and 'kvfileintegritycheck' roles.\n"); - printOptionUsage(" -b [on,off], --buggify [on,off]\n", - " Sets Buggify system state, defaults to `off'.\n"); - printOptionUsage(" --crash", "Crash on serious errors instead of continuing.\n"); - printOptionUsage(" -N NETWORKIMPL, --network NETWORKIMPL\n", - " Select network implementation, `net2' (default),\n" - " `net2-threadpool'.\n"); - printOptionUsage(" --unbufferedout\n", - " Do not buffer stdout and stderr.\n"); - printOptionUsage(" --bufferedout\n", - " Buffer stdout and stderr.\n"); - printOptionUsage(" --traceclock CLOCKIMPL\n", - " Select clock source for trace files, `now' (default) or\n" - " `realtime'.\n"); - printOptionUsage(" --num_testers NUM\n", - " A multitester will wait for NUM testers before starting\n" - " (defaults to 1).\n"); + printOptionUsage("-f TESTFILE, --testfile", + " Testfile to run, defaults to `tests/default.txt'."); + printOptionUsage("-R, --restarting", + " Restart a previous simulation that was cleanly shut down."); + printOptionUsage("-s SEED, --seed SEED", + " Random seed."); + printOptionUsage("-k KEY, --key KEY", "Target key for search role."); + printOptionUsage("--kvfile FILE", + "Input file (SQLite database file) for use by the 'kvfilegeneratesums' and 'kvfileintegritycheck' roles."); + printOptionUsage("-b [on,off], --buggify [on,off]", + " Sets Buggify system state, defaults to `off'."); + printOptionUsage("--crash", "Crash on serious errors instead of continuing."); + printOptionUsage("-N NETWORKIMPL, --network NETWORKIMPL", + " Select network implementation, `net2' (default)," + " `net2-threadpool'."); + printOptionUsage("--unbufferedout", + " Do not buffer stdout and stderr."); + printOptionUsage("--bufferedout", + " Buffer stdout and stderr."); + printOptionUsage("--traceclock CLOCKIMPL", + " Select clock source for trace files, `now' (default) or" + " `realtime'."); + printOptionUsage("--num_testers NUM", + " A multitester will wait for NUM testers before starting" + " (defaults to 1)."); #ifdef __linux__ - printOptionUsage(" --rsssize SIZE\n", - " Turns on automatic heap profiling when RSS memory size exceeds\n" - " the given threshold. fdbserver needs to be compiled with\n" - " USE_GPERFTOOLS flag in order to use this feature.\n"); + printOptionUsage("--rsssize SIZE", + " Turns on automatic heap profiling when RSS memory size exceeds" + " the given threshold. fdbserver needs to be compiled with" + " USE_GPERFTOOLS flag in order to use this feature."); #endif - printOptionUsage(" --testservers ADDRESSES\n", - " The addresses of networktestservers\n" - " specified as ADDRESS:PORT,ADDRESS:PORT...\n"); - printOptionUsage(" --testonservers\n", - " Testers are recruited on servers.\n"); - printOptionUsage(" --metrics_cluster CONNFILE\n", - " The cluster file designating where this process will\n" - " store its metric data. By default metrics will be stored\n" - " in the same database the process is participating in.\n"); - printOptionUsage(" --metrics_prefix PREFIX\n", - " The prefix where this process will store its metric data.\n" - " Must be specified if using a different database for metrics.\n"); - printOptionUsage(" --knob_KNOBNAME KNOBVALUE\n", - " Changes a database knob. KNOBNAME should be lowercase.\n"); - printOptionUsage(" --io_trust_seconds SECONDS\n", - " Sets the time in seconds that a read or write operation is allowed to take\n" - " before timing out with an error. If an operation times out, all future\n" - " operations on that file will fail with an error as well. Only has an effect\n" - " when using AsyncFileKAIO in Linux.\n"); - printOptionUsage(" --io_trust_warn_only\n", - " Instead of failing when an I/O operation exceeds io_trust_seconds, just\n" - " log a warning to the trace log. Has no effect if io_trust_seconds is unspecified.\n"); + printOptionUsage("--testservers ADDRESSES", + " The addresses of networktestservers" + " specified as ADDRESS:PORT,ADDRESS:PORT..."); + printOptionUsage("--testonservers", + " Testers are recruited on servers."); + printOptionUsage("--metrics_cluster CONNFILE", + " The cluster file designating where this process will" + " store its metric data. By default metrics will be stored" + " in the same database the process is participating in."); + printOptionUsage("--metrics_prefix PREFIX", + " The prefix where this process will store its metric data." + " Must be specified if using a different database for metrics."); + printOptionUsage("--knob_KNOBNAME KNOBVALUE", + " Changes a database knob. KNOBNAME should be lowercase."); + printOptionUsage("--io_trust_seconds SECONDS", + " Sets the time in seconds that a read or write operation is allowed to take" + " before timing out with an error. If an operation times out, all future" + " operations on that file will fail with an error as well. Only has an effect" + " when using AsyncFileKAIO in Linux."); + printOptionUsage("--io_trust_warn_only", + " Instead of failing when an I/O operation exceeds io_trust_seconds, just" + " log a warning to the trace log. Has no effect if io_trust_seconds is unspecified."); } else { - printOptionUsage(" --dev-help", "Display developer-specific help and exit.\n"); + printOptionUsage("--dev-help", "Display developer-specific help and exit."); } printf("\n" From 7553daba2040a5f9b143835ee7e3f1d83f9379f6 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 23 Sep 2020 16:13:30 -0400 Subject: [PATCH 7/8] change waitVersion to waitForBackup --- fdbserver/workloads/IncrementalBackup.actor.cpp | 6 +++--- tests/fast/IncrementalBackup.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/workloads/IncrementalBackup.actor.cpp b/fdbserver/workloads/IncrementalBackup.actor.cpp index 4e748f99f2..9e862b8c64 100644 --- a/fdbserver/workloads/IncrementalBackup.actor.cpp +++ b/fdbserver/workloads/IncrementalBackup.actor.cpp @@ -33,14 +33,14 @@ struct IncrementalBackupWorkload : TestWorkload { FileBackupAgent backupAgent; bool submitOnly; bool restoreOnly; - bool waitVersion; + bool waitForBackup; IncrementalBackupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { backupDir = getOption(options, LiteralStringRef("backupDir"), LiteralStringRef("file://simfdb/backups/")); tag = getOption(options, LiteralStringRef("tag"), LiteralStringRef("default")); submitOnly = getOption(options, LiteralStringRef("submitOnly"), false); restoreOnly = getOption(options, LiteralStringRef("restoreOnly"), false); - waitVersion = getOption(options, LiteralStringRef("waitVersion"), false); + waitForBackup = getOption(options, LiteralStringRef("waitForBackup"), false); } virtual std::string description() { return "IncrementalBackup"; } @@ -55,7 +55,7 @@ struct IncrementalBackupWorkload : TestWorkload { } virtual Future check(Database const& cx) { - if (clientId || !waitVersion) { + if (clientId || !waitForBackup) { return true; } return _check(cx, this); diff --git a/tests/fast/IncrementalBackup.toml b/tests/fast/IncrementalBackup.toml index 10a97e447d..bbc2c87e0f 100644 --- a/tests/fast/IncrementalBackup.toml +++ b/tests/fast/IncrementalBackup.toml @@ -22,7 +22,7 @@ simBackupAgents = 'BackupToFile' [[test.workload]] testName = 'IncrementalBackup' tag = 'default' - waitVersion = true + waitForBackup = true [[test]] testTitle = 'SubmitRestore' From ebad1cd9e62d5741b976d132afc15cb60ae83adb Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 24 Sep 2020 12:31:21 -0400 Subject: [PATCH 8/8] change waitBackup result from int to EBackupState --- fdbserver/workloads/IncrementalBackup.actor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/workloads/IncrementalBackup.actor.cpp b/fdbserver/workloads/IncrementalBackup.actor.cpp index 9e862b8c64..c91228d883 100644 --- a/fdbserver/workloads/IncrementalBackup.actor.cpp +++ b/fdbserver/workloads/IncrementalBackup.actor.cpp @@ -64,9 +64,9 @@ struct IncrementalBackupWorkload : TestWorkload { ACTOR static Future _check(Database cx, IncrementalBackupWorkload* self) { state Reference backupContainer; state UID backupUID; - int waitResult = + EBackupState waitResult = wait(self->backupAgent.waitBackup(cx, self->tag.toString(), false, &backupContainer, &backupUID)); - TraceEvent("IBackupCheckWaitResult").detail("Result", waitResult); + TraceEvent("IBackupCheckWaitResult").detail("Result", BackupAgentBase::getStateText(waitResult)); state Reference tr(new ReadYourWritesTransaction(cx)); state Version v = wait(tr->getReadVersion()); loop { @@ -94,8 +94,8 @@ struct IncrementalBackupWorkload : TestWorkload { false, true)); // Wait for backup container to be created and avoid race condition wait(delay(60.0)); - int waitResult = wait(self->backupAgent.waitBackup(cx, self->tag.toString(), false)); - TraceEvent("IBackupSubmitWaitResult").detail("Result", waitResult); + EBackupState waitResult = wait(self->backupAgent.waitBackup(cx, self->tag.toString(), false)); + TraceEvent("IBackupSubmitWaitResult").detail("Result", BackupAgentBase::getStateText(waitResult)); } catch (Error& e) { TraceEvent("IBackupSubmitError").error(e); if (e.code() != error_code_backup_duplicate) {