From f5e8345496389bade3e3ae4cca0d22cc7c68d77c Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Mon, 27 Apr 2020 22:07:45 -0700 Subject: [PATCH] FastRestoreAgent:Use atomicParallelRestore to kick off restore Replace the handcrafted version with atomicParallelRestore actor which is simulation tested --- fdbbackup/backup.actor.cpp | 127 +++++----------------------- fdbclient/BackupAgent.actor.h | 4 - fdbclient/FileBackupAgent.actor.cpp | 23 +++++ 3 files changed, 45 insertions(+), 109 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 7eef8ebc15..e83073bf1f 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -2192,8 +2192,7 @@ ACTOR Future runRestore(Database db, std::string originalClusterFile, std: // Fast restore agent that kicks off the restore: send restore requests to restore workers. ACTOR Future runFastRestoreAgent(Database db, std::string tagName, std::string container, Standalone> ranges, Version dbVersion, - bool performRestore, bool verbose, bool waitForDone, std::string addPrefix, - std::string removePrefix) { + bool performRestore, bool verbose, bool waitForDone) { try { state FileBackupAgent backupAgent; state Version restoreVersion = invalidVersion; @@ -2219,9 +2218,26 @@ ACTOR Future runFastRestoreAgent(Database db, std::string tagName, std::st dbVersion = desc.maxRestorableVersion.get(); TraceEvent("FastRestoreAgent").detail("TargetRestoreVersion", dbVersion); } - Version _restoreVersion = wait(fastRestore(db, KeyRef(tagName), KeyRef(container), waitForDone, dbVersion, - verbose, range, KeyRef(addPrefix), KeyRef(removePrefix))); - restoreVersion = _restoreVersion; + state UID randomUID = deterministicRandom()->randomUniqueID(); + TraceEvent("FastRestoreAgent") + .detail("SubmitRestoreRequests", ranges.size()) + .detail("RestoreUID", randomUID); + wait(backupAgent.submitParallelRestore(db, KeyRef(tagName), ranges, KeyRef(container), dbVersion, true, + randomUID)); + if (waitForDone) { + // Wait for parallel restore to finish and unlock DB after that + TraceEvent("FastRestoreAgent").detail("BackupAndParallelRestore", "WaitForRestoreToFinish"); + wait(backupAgent.parallelRestoreFinish(db, randomUID)); + TraceEvent("FastRestoreAgent").detail("BackupAndParallelRestore", "RestoreFinished"); + } else { + TraceEvent("FastRestoreAgent") + .detail("RestoreUID", randomUID) + .detail("OperationGuide", "Manually unlock DB when restore finishes"); + printf("WARNING: DB will be in locked state after restore. Need UID:%s to unlock DB\n", + randomUID.toString()); + } + + restoreVersion = dbVersion; } else { state Reference bc = IBackupContainer::openContainer(container); state BackupDescription description = wait(bc->describeBackup()); @@ -3740,7 +3756,7 @@ int main(int argc, char* argv[]) { switch (restoreType) { case RESTORE_START: f = stopAfter(runFastRestoreAgent(db, tagName, restoreContainer, backupKeys, restoreVersion, !dryRun, - !quietDisplay, waitForDone, addPrefix, removePrefix)); + !quietDisplay, waitForDone)); break; case RESTORE_WAIT: printf("[TODO][ERROR] FastRestore does not support RESTORE_WAIT yet!\n"); @@ -3887,102 +3903,3 @@ int main(int argc, char* argv[]) { flushAndExit(status); } - -//------Restore Agent: Kick off the restore by sending the restore requests -ACTOR static Future waitFastRestore(Database cx, Key tagName, bool verbose) { - // We should wait on all restore to finish before proceeds - TraceEvent("FastRestore").detail("Progress", "WaitForRestoreToFinish"); - state ReadYourWritesTransaction tr(cx); - state Future fRestoreRequestDone; - state bool restoreRequestDone = false; - - loop { - try { - tr.reset(); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - // In case restoreRequestDoneKey is already set before we set watch on it - Optional restoreRequestDoneKeyValue = wait(tr.get(restoreRequestDoneKey)); - if (restoreRequestDoneKeyValue.present()) { - restoreRequestDone = true; - tr.clear(restoreRequestDoneKey); - wait(tr.commit()); - break; - } else if (!restoreRequestDone) { - fRestoreRequestDone = tr.watch(restoreRequestDoneKey); - wait(tr.commit()); - wait(fRestoreRequestDone); - } else { - break; - } - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - TraceEvent("FastRestore").detail("Progress", "RestoreFinished"); - - return FileBackupAgent::ERestoreState::COMPLETED; -} - -ACTOR static Future _fastRestore(Database cx, Key tagName, Key url, bool waitForComplete, - Version targetVersion, bool verbose, KeyRange range, Key addPrefix, - Key removePrefix) { - state Reference bc = IBackupContainer::openContainer(url.toString()); - state BackupDescription desc = wait(bc->describeBackup()); - wait(desc.resolveVersionTimes(cx)); - - if (targetVersion == invalidVersion && desc.maxRestorableVersion.present()) - targetVersion = desc.maxRestorableVersion.get(); - - Optional restoreSet = wait(bc->getRestoreSet(targetVersion)); - TraceEvent("FastRestore").detail("BackupDesc", desc.toString()).detail("TargetVersion", targetVersion); - - if (!restoreSet.present()) { - TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible") - .detail("BackupContainer", bc->getURL()) - .detail("TargetVersion", targetVersion); - throw restore_invalid_version(); - } - - // NOTE: The restore agent makes sure we only support 1 restore range for each restore request for now! - // The simulation test did test restoring multiple restore ranges in one restore request though. - state Reference tr(new ReadYourWritesTransaction(cx)); - state int restoreIndex = 0; - loop { - try { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - Standalone restoreTag(tagName.toString() + "_" + std::to_string(restoreIndex)); - bool locked = true; - struct RestoreRequest restoreRequest(restoreIndex, restoreTag, KeyRef(bc->getURL()), true, targetVersion, - true, range, Key(), Key(), locked, - deterministicRandom()->randomUniqueID()); - tr->set(restoreRequestKeyFor(restoreRequest.index), restoreRequestValue(restoreRequest)); - // backupRanges.size = 1 because we only support restoring 1 range in real mode for now - tr->set(restoreRequestTriggerKey, restoreRequestTriggerValue(deterministicRandom()->randomUniqueID(),1)); - wait(tr->commit()); // Trigger fast restore - break; - } catch (Error& e) { - if (e.code() != error_code_restore_duplicate_tag) { - wait(tr->onError(e)); - } - } - } - - if (waitForComplete) { - FileBackupAgent::ERestoreState finalState = wait(waitFastRestore(cx, tagName, verbose)); - if (finalState != FileBackupAgent::ERestoreState::COMPLETED) throw restore_error(); - } - - return targetVersion; -} - -ACTOR Future fastRestore(Database cx, Standalone tagName, Standalone url, - bool waitForComplete, long targetVersion, bool verbose, Standalone range, - Standalone addPrefix, Standalone removePrefix) { - Version result = - wait(_fastRestore(cx, tagName, url, waitForComplete, targetVersion, verbose, range, addPrefix, removePrefix)); - return result; -} diff --git a/fdbclient/BackupAgent.actor.h b/fdbclient/BackupAgent.actor.h index f728bcd488..4699aa480b 100644 --- a/fdbclient/BackupAgent.actor.h +++ b/fdbclient/BackupAgent.actor.h @@ -893,10 +893,6 @@ public: } }; -ACTOR Future fastRestore(Database cx, Standalone tagName, Standalone url, - bool waitForComplete, long targetVersion, bool verbose, Standalone range, - Standalone addPrefix, Standalone removePrefix); - // Helper class for reading restore data from a buffer and throwing the right errors. struct StringRefReader { StringRefReader(StringRef s = StringRef(), Error e = Error()) : rptr(s.begin()), end(s.end()), failure_error(e) {} diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index 59f1837374..b4cefe9eb6 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3628,6 +3628,29 @@ public: ACTOR static Future submitParallelRestore(Database cx, Key backupTag, Standalone> backupRanges, KeyRef bcUrl, Version targetVersion, bool lockDB, UID randomUID) { + // Sanity check backup is valid + state Reference bc = IBackupContainer::openContainer(bcUrl.toString()); + state BackupDescription desc = wait(bc->describeBackup()); + wait(desc.resolveVersionTimes(cx)); + + Optional restoreSet = wait(bc->getRestoreSet(targetVersion)); + + if (!restoreSet.present()) { + TraceEvent(SevWarn, "FileBackupAgentRestoreNotPossible") + .detail("BackupContainer", bc->getURL()) + .detail("TargetVersion", targetVersion); + throw restore_invalid_version(); + } + + if (targetVersion == invalidVersion && desc.maxRestorableVersion.present()) { + targetVersion = desc.maxRestorableVersion.get(); + TraceEvent(SevWarn, "FastRestoreSubmitRestoreRequestWithInvalidTargetVersion") + .detail("OverrideTargetVersion", targetVersion); + } + TraceEvent("FastRestoreSubmitRestoreRequest") + .detail("BackupDesc", desc.toString()) + .detail("TargetVersion", targetVersion); + state Reference tr(new ReadYourWritesTransaction(cx)); state int restoreIndex = 0; state int numTries = 0;