From 7caa012fbf2d4d8c7acc1223fb119ab60f350eb3 Mon Sep 17 00:00:00 2001 From: Stephen Atherton <s_atherton@apple.com> Date: Wed, 20 Dec 2017 00:49:08 -0800 Subject: [PATCH] Added snapshot interval option to "fdbbackup start" which defaults to a new knob's value. Added snapshot info to backup status text. Improvements to fdbbackup help. --- fdbbackup/backup.actor.cpp | 29 +++++++++++++++++++++-------- fdbclient/FileBackupAgent.actor.cpp | 26 +++++++++++++++++++++----- fdbclient/Knobs.cpp | 1 + fdbclient/Knobs.h | 1 + flow/genericactors.actor.h | 5 +++++ 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 582f9021df..8d69306927 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -90,7 +90,7 @@ enum enumRestoreType { // enum { // Backup constants - OPT_DESTCONTAINER, OPT_ERRORLIMIT, OPT_NOSTOPWHENDONE, OPT_EXPVERSION, OPT_BASEURL, OPT_DATETIME, + OPT_DESTCONTAINER, OPT_SNAPSHOTINTERVAL, OPT_ERRORLIMIT, OPT_NOSTOPWHENDONE, OPT_EXPVERSION, OPT_BASEURL, OPT_DATETIME, // Backup and Restore constants OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE, @@ -148,6 +148,8 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = { { OPT_NOSTOPWHENDONE, "--no-stop-when-done",SO_NONE }, { OPT_DESTCONTAINER, "-d", SO_REQ_SEP }, { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, + { OPT_SNAPSHOTINTERVAL, "-s", SO_REQ_SEP }, + { OPT_SNAPSHOTINTERVAL, "--snapshot_interval", SO_REQ_SEP }, { OPT_TAGNAME, "-t", SO_REQ_SEP }, { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_BACKUPKEYS, "-k", SO_REQ_SEP }, @@ -686,10 +688,11 @@ static void printBackupUsage(bool devhelp) { " FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n" " then `%s'.\n", platform::getDefaultClusterFilePath().c_str()); printf(" -D, --date DATETIME\n" - " Delete all data from the beginning to the given date and time in YYYY-MM-DD.HH:MI:SS format (UTC).\n"); + " For expire operations, delete all data prior to (approximately) the given timestamp in YYYY-MM-DD.HH:MI:SS format (UTC).\n"); printf(" -d, --destcontainer URL\n" - " The Backup URL for the destination of this backup.\n"); + " The Backup URL for the operation.\n"); printBackupContainerInfo(); + printf(" -s DURATION When starting a backup, use snapshot interval DURATION in seconds. Defaults to %d.\n", CLIENT_KNOBS->BACKUP_DEFAULT_SNAPSHOT_INTERVAL_SEC); printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n"); printf(" -k KEYS List of key ranges to backup.\n" " If not specified, the entire database will be backed up.\n"); @@ -1361,7 +1364,7 @@ ACTOR Future<Void> submitDBBackup(Database src, Database dest, Standalone<Vector return Void(); } -ACTOR Future<Void> submitBackup(Database db, std::string destinationDir, Standalone<VectorRef<KeyRangeRef>> backupRanges, std::string tagName, bool dryRun, bool waitForCompletion, bool stopWhenDone) { +ACTOR Future<Void> submitBackup(Database db, std::string url, int snapshotIntervalSeconds, Standalone<VectorRef<KeyRangeRef>> backupRanges, std::string tagName, bool dryRun, bool waitForCompletion, bool stopWhenDone) { try { state FileBackupAgent backupAgent; @@ -1406,7 +1409,7 @@ ACTOR Future<Void> submitBackup(Database db, std::string destinationDir, Standal } else { - Void _ = wait(backupAgent.submitBackup(db, KeyRef(destinationDir), tagName, backupRanges, stopWhenDone)); + Void _ = wait(backupAgent.submitBackup(db, KeyRef(url), snapshotIntervalSeconds, tagName, backupRanges, stopWhenDone)); // Wait for the backup to complete, if requested if (waitForCompletion) { @@ -1796,7 +1799,7 @@ ACTOR Future<Void> expireBackupData(const char *name, std::string destinationCon try { Reference<IBackupContainer> c = openBackupContainer(name, destinationContainer); Void _ = wait(c->expireData(endVersion)); - printf("All data before version %lld are deleted\n", endVersion); + printf("All data before version %lld is deleted.\n", endVersion); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) @@ -1821,7 +1824,7 @@ ACTOR Future<Void> deleteBackupContainer(const char *name, std::string destinati when ( Void _ = wait(delay(3)) ) { int numDeleted = 0; c->deleteContainer(&numDeleted); - printf("%d files have been deleted.\n", numDeleted); + printf("%d files have been deleted so far...\n", numDeleted); } } } @@ -2200,6 +2203,7 @@ int main(int argc, char* argv[]) { } std::string destinationContainer; + int snapshotIntervalSeconds = CLIENT_KNOBS->BACKUP_DEFAULT_SNAPSHOT_INTERVAL_SEC; std::string clusterFile; std::string sourceClusterFile; std::string baseUrl; @@ -2381,6 +2385,15 @@ int main(int argc, char* argv[]) { if(StringRef(destinationContainer).startsWith(LiteralStringRef("/"))) destinationContainer = std::string("file://") + destinationContainer; break; + case OPT_SNAPSHOTINTERVAL: { + const char* a = args->OptionArg(); + if (!sscanf(a, "%d", &snapshotIntervalSeconds)) { + fprintf(stderr, "ERROR: Could not parse snapshot interval `%s'\n", a); + printHelpTeaser(argv[0]); + return FDB_EXIT_ERROR; + } + break; + } case OPT_WAITFORDONE: waitForDone = true; break; @@ -2665,7 +2678,7 @@ int main(int argc, char* argv[]) { { // Test out the backup url to make sure it parses. Doesn't test to make sure it's actually writeable. openBackupContainer(argv[0], destinationContainer); - f = stopAfter( submitBackup(db, destinationContainer, backupKeys, tagName, dryRun, waitForDone, stopWhenDone) ); + f = stopAfter( submitBackup(db, destinationContainer, snapshotIntervalSeconds, backupKeys, tagName, dryRun, waitForDone, stopWhenDone) ); break; } diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 819df0d38f..8628dee37e 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -37,11 +37,6 @@ const Key FileBackupAgent::keyLastRestorable = LiteralStringRef("last_restorable"); -template<class T> -Future<Void> store(Future<T> what, T &out) { - return map(what, [&out](T const &v) { out = v; return Void(); }); -} - // For convenience typedef FileBackupAgent::ERestoreState ERestoreState; @@ -3311,6 +3306,7 @@ public: state std::string backupStatus(BackupAgentBase::getStateText(backupState)); state Reference<IBackupContainer> bc = wait(config.backupContainer().getOrThrow(tr)); state Optional<Version> stopVersion = wait(config.getLatestRestorableVersion(tr)); + bool snapshotProgress = false; switch (backupState) { case BackupAgentBase::STATE_SUBMITTED: @@ -3318,9 +3314,11 @@ public: break; case BackupAgentBase::STATE_BACKUP: statusText += "The backup on tag `" + tagName + "' is in progress to " + bc->getURL() + ".\n"; + snapshotProgress = true; break; case BackupAgentBase::STATE_DIFFERENTIAL: statusText += "The backup on tag `" + tagName + "' is restorable but continuing to " + bc->getURL() + ".\n"; + snapshotProgress = true; break; case BackupAgentBase::STATE_COMPLETED: statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " completed at version " + format("%lld", stopVersion.orDefault(-1)) + ".\n"; @@ -3329,6 +3327,24 @@ public: statusText += "The previous backup on tag `" + tagName + "' at " + bc->getURL() + " " + backupStatus + ".\n"; break; } + + if(snapshotProgress) { + state int64_t snapshotInterval; + state Version recentReadVersion; + state Version snapshotBeginVersion; + state Version snapshotTargetEndVersion; + + Void _ = wait(store(config.snapshotBeginVersion().getOrThrow(tr), snapshotBeginVersion) + && store(config.snapshotTargetEndVersion().getOrThrow(tr), snapshotTargetEndVersion) + && store(config.snapshotIntervalSeconds().getOrThrow(tr), snapshotInterval) + && store(tr->getReadVersion(), recentReadVersion)); + + statusText += format("Snapshot interval is %lld seconds. ", snapshotInterval); + if(backupState == BackupAgentBase::STATE_DIFFERENTIAL) + statusText += format("Current snapshot progress target is %3.2f%%\n", 100.0 * (recentReadVersion - snapshotBeginVersion) / (snapshotTargetEndVersion - snapshotBeginVersion)) ; + else + statusText += "The initial snapshot is still running.\n"; + } } // Append the errors, if requested diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index fcc9b9dc5c..caf34f2330 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -97,6 +97,7 @@ ClientKnobs::ClientKnobs(bool randomize) { init( BACKUP_RANGE_TIMEOUT, TASKBUCKET_TIMEOUT_VERSIONS/CORE_VERSIONSPERSECOND/2.0 ); init( BACKUP_RANGE_MINWAIT, std::max(1.0, BACKUP_RANGE_TIMEOUT/2.0)); init( BACKUP_SNAPSHOT_DISPATCH_INTERVAL_SEC, 60 * 60 ); // 1 hour + init( BACKUP_DEFAULT_SNAPSHOT_INTERVAL_SEC, 3600 * 24 * 10); // 10 days init( BACKUP_SHARD_TASK_LIMIT, 1000 ); if( randomize && BUGGIFY ) BACKUP_SHARD_TASK_LIMIT = 4; init( BACKUP_AGGREGATE_POLL_RATE_UPDATE_INTERVAL, 60); init( BACKUP_AGGREGATE_POLL_RATE, 2.0 ); // polls per second target for all agents on the cluster diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index bf552ba480..7d2a70e734 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -99,6 +99,7 @@ public: double BACKUP_RANGE_TIMEOUT; double BACKUP_RANGE_MINWAIT; int BACKUP_SNAPSHOT_DISPATCH_INTERVAL_SEC; + int BACKUP_DEFAULT_SNAPSHOT_INTERVAL_SEC; int BACKUP_SHARD_TASK_LIMIT; double BACKUP_AGGREGATE_POLL_RATE; double BACKUP_AGGREGATE_POLL_RATE_UPDATE_INTERVAL; diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index bdf30d6ee6..7ab0ff1eff 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -279,6 +279,11 @@ Future<Void> holdWhileVoid(X object, Future<T> what) return Void(); } +template<class T> +Future<Void> store(Future<T> what, T &out) { + return map(what, [&out](T const &v) { out = v; return Void(); }); +} + //Waits for a future to be ready, and then applies an asynchronous function to it. ACTOR template<class T, class F, class U = decltype( fake<F>()(fake<T>()).getValue() )> Future<U> mapAsync(Future<T> what, F actorFunc)