From ec9410492dd3ccd13ebc94f3d37528ef9ab6b8ed Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 23 Nov 2018 05:23:56 -0800 Subject: [PATCH 01/55] Changed backup folder scheme to use a much smaller number of unique folders for kv range files, which will speed up list and expire operations. The old scheme is still readable but will no longer be written except in simulation in order to test backward compatibility. --- fdbclient/BackupAgent.h | 10 +++ fdbclient/BackupContainer.actor.cpp | 119 +++++++++++++++++++++++----- fdbclient/BackupContainer.h | 2 +- fdbclient/FileBackupAgent.actor.cpp | 25 +++++- flow/genericactors.actor.h | 10 +++ 5 files changed, 142 insertions(+), 24 deletions(-) diff --git a/fdbclient/BackupAgent.h b/fdbclient/BackupAgent.h index 247fc98799..caff559b2e 100644 --- a/fdbclient/BackupAgent.h +++ b/fdbclient/BackupAgent.h @@ -615,6 +615,15 @@ public: return configSpace.pack(LiteralStringRef(__FUNCTION__)); } + // Number of kv range files that were both committed to persistent storage AND inserted into + // the snapshotRangeFileMap. Note that since insertions could replace 1 or more existing + // map entries this is not necessarily the number of entries currently in the map. + // This value exists to help with sizing of kv range folders for BackupContainers that + // require it. + KeyBackedBinaryValue snapshotRangeFileCount() { + return configSpace.pack(LiteralStringRef(__FUNCTION__)); + } + // Coalesced set of ranges already dispatched for writing. typedef KeyBackedMap RangeDispatchMapT; RangeDispatchMapT snapshotRangeDispatchMap() { @@ -671,6 +680,7 @@ public: copy.snapshotBeginVersion().set(tr, beginVersion.get()); copy.snapshotTargetEndVersion().set(tr, endVersion); + copy.snapshotRangeFileCount().set(tr, 0); return Void(); }); diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 3220e6bbc0..76add48e1e 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -143,16 +143,36 @@ std::string BackupDescription::toString() const { /* BackupContainerFileSystem implements a backup container which stores files in a nested folder structure. * Inheritors must only defined methods for writing, reading, deleting, sizing, and listing files. * - * BackupInfo is stored as a JSON document at - * /info - * Snapshots are stored as JSON at file paths like - * /snapshots/snapshot,startVersion,endVersion,totalBytes - * Log and Range data files at file paths like - * /logs/.../log,startVersion,endVersion,blockSize - * /ranges/.../range,version,uid,blockSize + * Snapshot manifests (a complete set of files constituting a database snapshot for the backup's target ranges) + * are stored as JSON files at paths like + * /snapshots/snapshot,minVersion,maxVersion,totalBytes + * + * Key range files for snapshots are stored at paths like + * /kvranges/snapshot,startVersion/N/range,version,uid,blockSize + * where startVersion is the version at which the backup snapshot execution began and N is a number + * that is increased as key range files are generated over time (at varying rates) such that there + * are around 5,000 key range files in each folder. * - * Where ... is a multi level path which sorts lexically into version order and targets 10,000 or less - * entries in each folder (though a full speed snapshot could exceed this count at the innermost folder level) + * Note that startVersion will NOT correspond to the minVersion of a snapshot manifest because + * snapshot manifest min/max versions are based on the actual contained data and the first data + * file written will be after the start version of the snapshot's execution. + * + * Log files are at file paths like + * /logs/.../log,startVersion,endVersion,blockSize + * where ... is a multi level path which sorts lexically into version order and results in approximately 1 + * unique folder per day containing about 5,000 files. + * + * BACKWARD COMPATIBILITY + * + * Prior to FDB version 6.0.16, key range files were stored using a different folder scheme. Newer versions + * still support this scheme for all restore and backup management operations but key range files generated + * by backup using version 6.0.16 or later use the scheme describe above. + * + * The old format stored key range files at paths like + * /ranges/.../range,version,uid,blockSize + * where ... is a multi level path with sorts lexically into version order and results in up to approximately + * 900 unique folders per day. The number of files per folder depends on the configured snapshot rate and + * database size and will vary from 1 to around 5,000. */ class BackupContainerFileSystem : public IBackupContainer { public: @@ -166,8 +186,8 @@ public: virtual Future create() = 0; // Get a list of fileNames and their sizes in the container under the given path - // The implementation can (but does not have to) use the folder path filter to avoid traversing - // specific subpaths. + // Although not required, an implementation can avoid traversing unwanted subfolders + // by calling folderPathFilter(absoluteFolderPath) and checking for a false return value. typedef std::vector> FilesAndSizesT; virtual Future listFiles(std::string path = "", std::function folderPathFilter = nullptr) = 0; @@ -207,10 +227,24 @@ public: } // The innermost folder covers 100 seconds (1e8 versions) During a full speed backup it is possible though very unlikely write about 10,000 snapshot range files during that time. - static std::string rangeVersionFolderString(Version v) { + static std::string old_rangeVersionFolderString(Version v) { return format("ranges/%s/", versionFolderString(v, 8).c_str()); } + // Get the root folder for a snapshot's data based on its begin version + static std::string snapshotFolderString(Version snapshotBeginVersion) { + return format("kvranges/snapshot.%018lld", snapshotBeginVersion); + } + + // Extract the snapshot begin version from a path + static Version extractSnapshotBeginVersion(std::string path) { + Version snapshotBeginVersion; + if(sscanf(path.c_str(), "kvranges/snapshot.%018lld", &snapshotBeginVersion) == 1) { + return snapshotBeginVersion; + } + return invalidVersion; + } + // The innermost folder covers 100,000 seconds (1e11 versions) which is 5,000 mutation log files at current settings. static std::string logVersionFolderString(Version v) { return format("logs/%s/", versionFolderString(v, 11).c_str()); @@ -220,8 +254,15 @@ public: return writeFile(logVersionFolderString(beginVersion) + format("log,%lld,%lld,%s,%d", beginVersion, endVersion, g_random->randomUniqueID().toString().c_str(), blockSize)); } - Future> writeRangeFile(Version version, int blockSize) { - return writeFile(rangeVersionFolderString(version) + format("range,%lld,%s,%d", version, g_random->randomUniqueID().toString().c_str(), blockSize)); + Future> writeRangeFile(Version snapshotBeginVersion, int snapshotFileCount, Version fileVersion, int blockSize) { + std::string fileName = format("range,%lld,%s,%d", fileVersion, g_random->randomUniqueID().toString().c_str(), blockSize); + + // In order to test backward compatibility in simulation, sometimes write to the old path format + if(BUGGIFY) { + return writeFile(old_rangeVersionFolderString(fileVersion) + fileName); + } + + return writeFile(snapshotFolderString(snapshotBeginVersion) + format("/%d/", snapshotFileCount / (BUGGIFY ? 1 : 5000)) + fileName); } static bool pathToRangeFile(RangeFile &out, std::string path, int64_t size) { @@ -265,6 +306,7 @@ public: // TODO: Do this more efficiently, as the range file list for a snapshot could potentially be hundreds of megabytes. ACTOR static Future> readKeyspaceSnapshot_impl(Reference bc, KeyspaceSnapshotFile snapshot) { // Read the range file list for the specified version range, and then index them by fileName. + // This is so we can verify that each of the files listed in the manifest file are also in the container at this time. std::vector files = wait(bc->listRangeFiles(snapshot.beginVersion, snapshot.endVersion)); state std::map rangeIndex; for(auto &f : files) @@ -386,11 +428,12 @@ public: }); } - // List range files, in sorted version order, which contain data at or between beginVersion and endVersion - Future> listRangeFiles(Version beginVersion = 0, Version endVersion = std::numeric_limits::max()) { + // List range files which contain data at or between beginVersion and endVersion + // NOTE: This reads the range file folder schema from FDB 6.0.15 and earlier and is provided for backward compatibility + Future> old_listRangeFiles(Version beginVersion, Version endVersion) { // Get the cleaned (without slashes) first and last folders that could contain relevant results. - std::string firstPath = cleanFolderString(rangeVersionFolderString(beginVersion)); - std::string lastPath = cleanFolderString(rangeVersionFolderString(endVersion)); + std::string firstPath = cleanFolderString(old_rangeVersionFolderString(beginVersion)); + std::string lastPath = cleanFolderString(old_rangeVersionFolderString(endVersion)); std::function pathFilter = [=](const std::string &folderPath) { // Remove slashes in the given folder path so that the '/' positions in the version folder string do not matter @@ -407,6 +450,38 @@ public: if(pathToRangeFile(rf, f.first, f.second) && rf.version >= beginVersion && rf.version <= endVersion) results.push_back(rf); } + return results; + }); + } + + // List range files which contain data at or between beginVersion and endVersion + // Note: The contents of each top level snapshot.N folder do not necessarily constitute a valid snapshot + // and therefore listing files is not how RestoreSets are obtained. + // Note: Snapshots partially written using FDB versions prior to 6.0.16 will have some range files stored + // using the old folder scheme read by old_listRangeFiles + Future> listRangeFiles(Version beginVersion, Version endVersion) { + // Until the old folder scheme is no longer supported, read files stored using old folder scheme + Future> oldFiles = old_listRangeFiles(beginVersion, endVersion); + + // Define filter function (for listFiles() implementations that use it) to reject any folder + // starting after endVersion + std::function pathFilter = [=](std::string const &path) { + return extractSnapshotBeginVersion(path) > endVersion; + }; + + Future> newFiles = map(listFiles("kvranges/", pathFilter), [=](const FilesAndSizesT &files) { + std::vector results; + RangeFile rf; + for(auto &f : files) { + if(pathToRangeFile(rf, f.first, f.second) && rf.version >= beginVersion && rf.version <= endVersion) + results.push_back(rf); + } + return results; + }); + + return map(success(oldFiles) && success(newFiles), [=](Void _) { + std::vector results = std::move(newFiles.get()); + results.insert(results.end(), std::make_move_iterator(oldFiles.get().begin()), std::make_move_iterator(oldFiles.get().end())); std::sort(results.begin(), results.end()); return results; }); @@ -1403,9 +1478,11 @@ ACTOR Future testBackupContainer(std::string url) { state Reference log1 = wait(c->writeLogFile(100 + versionShift, 150 + versionShift, 10)); state Reference log2 = wait(c->writeLogFile(150 + versionShift, 300 + versionShift, 10)); - state Reference range1 = wait(c->writeRangeFile(160 + versionShift, 10)); - state Reference range2 = wait(c->writeRangeFile(300 + versionShift, 10)); - state Reference range3 = wait(c->writeRangeFile(310 + versionShift, 10)); + state Version snapshotBeginVersion1 = 160 + versionShift; + state Version snapshotBeginVersion2 = 310 + versionShift; + state Reference range1 = wait(c->writeRangeFile(snapshotBeginVersion1, 0, snapshotBeginVersion1, 10)); + state Reference range2 = wait(c->writeRangeFile(snapshotBeginVersion1, 1, 300 + versionShift, 10)); + state Reference range3 = wait(c->writeRangeFile(snapshotBeginVersion2, 0, snapshotBeginVersion2, 10)); Void _ = wait( writeAndVerifyFile(c, log1, 0) diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index 4a0d659528..58bc536d09 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -156,7 +156,7 @@ public: // Open a log file or range file for writing virtual Future> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0; - virtual Future> writeRangeFile(Version version, int blockSize) = 0; + virtual Future> writeRangeFile(Version snapshotBeginVersion, int snapshotFileCount, Version fileVersion, int blockSize) = 0; // Write a KeyspaceSnapshotFile of range file names representing a full non overlapping // snapshot of the key ranges this backup is targeting. diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index abcf1c1703..cfff72a833 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -1002,6 +1002,7 @@ namespace fileBackup { // Update the range bytes written in the backup config backup.rangeBytesWritten().atomicOp(tr, file->size(), MutationRef::AddValue); + backup.snapshotRangeFileCount().atomicOp(tr, 1, MutationRef::AddValue); // See if there is already a file for this key which has an earlier begin, update the map if not. Optional s = wait(backup.snapshotRangeFileMap().get(tr, range.end)); @@ -1127,11 +1128,31 @@ namespace fileBackup { if(done) return Void(); - // Start writing a new file + // Start writing a new file after verifying this task should keep running as of a new read version (which must be >= outVersion) outVersion = values.second; // block size must be at least large enough for 3 max size keys and 2 max size values + overhead so 250k conservatively. state int blockSize = BUGGIFY ? g_random->randomInt(250e3, 4e6) : CLIENT_KNOBS->BACKUP_RANGEFILE_BLOCK_SIZE; - Reference f = wait(bc->writeRangeFile(outVersion, blockSize)); + state Version snapshotBeginVersion; + state int64_t snapshotRangeFileCount; + + state Reference tr(new ReadYourWritesTransaction(cx)); + loop { + try { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + + Void _ = wait(taskBucket->keepRunning(tr, task) + && storeOrThrow(backup.snapshotBeginVersion().get(tr), snapshotBeginVersion) + && storeOrThrow(backup.snapshotRangeFileCount().get(tr), snapshotRangeFileCount) + ); + + break; + } catch(Error &e) { + Void _ = wait(tr->onError(e)); + } + } + + Reference f = wait(bc->writeRangeFile(snapshotBeginVersion, snapshotRangeFileCount, outVersion, blockSize)); outFile = f; // Initialize range file writer and write begin key diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index 5c83edbdae..88f57411d6 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -296,6 +296,16 @@ Future store(Future what, T &out) { return map(what, [&out](T const &v) { out = v; return Void(); }); } +template +Future storeOrThrow(Future> what, T &out, Error e = key_not_found()) { + return map(what, [&out,e](Optional const &o) { + if(!o.present()) + throw e; + out = o.get(); + return Void(); + }); +} + //Waits for a future to be ready, and then applies an asynchronous function to it. ACTOR template()(fake()).getValue() )> Future mapAsync(Future what, F actorFunc) From aa648daabf1422a8831a4ccbb5b7952707603d95 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 23 Nov 2018 12:49:10 -0800 Subject: [PATCH 02/55] Compile fix for linux, can't make a move iterator from a const container reference. --- fdbclient/BackupContainer.actor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 76add48e1e..a9fbb4bda6 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -481,7 +481,8 @@ public: return map(success(oldFiles) && success(newFiles), [=](Void _) { std::vector results = std::move(newFiles.get()); - results.insert(results.end(), std::make_move_iterator(oldFiles.get().begin()), std::make_move_iterator(oldFiles.get().end())); + std::vector oldResults = std::move(oldFiles.get()); + results.insert(results.end(), std::make_move_iterator(oldResults.begin()), std::make_move_iterator(oldResults.end())); std::sort(results.begin(), results.end()); return results; }); From 0610a19e4d6e879b22c12f86137544886437e7bc Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sat, 24 Nov 2018 17:24:54 -0800 Subject: [PATCH 03/55] Rewrote backup container unit test to use randomness to cover a wider variety of data patterns and edge cases. --- fdbclient/BackupContainer.actor.cpp | 170 +++++++++++++++++----------- 1 file changed, 106 insertions(+), 64 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index a9fbb4bda6..e42bd94c04 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -258,7 +258,7 @@ public: std::string fileName = format("range,%lld,%s,%d", fileVersion, g_random->randomUniqueID().toString().c_str(), blockSize); // In order to test backward compatibility in simulation, sometimes write to the old path format - if(BUGGIFY) { + if(g_network->isSimulated() && g_random->coinflip()) { return writeFile(old_rangeVersionFolderString(fileVersion) + fileName); } @@ -1438,6 +1438,15 @@ ACTOR Future> timeKeeperEpochsFromVersion(Version v, Reference return found.first + (v - found.second) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND; } +int chooseFileSize(std::vector &sizes) { + int size = 1000; + if(!sizes.empty()) { + size = sizes.back(); + sizes.pop_back(); + } + return size; +} + ACTOR Future writeAndVerifyFile(Reference c, Reference f, int size) { state Standalone content; if(size > 0) { @@ -1460,6 +1469,12 @@ ACTOR Future writeAndVerifyFile(Reference c, ReferencerandomInt64(1, CLIENT_KNOBS->CORE_VERSIONSPERSECOND); + return v + increment; +} + ACTOR Future testBackupContainer(std::string url) { printf("BackupContainerTest URL %s\n", url.c_str()); @@ -1475,88 +1490,115 @@ ACTOR Future testBackupContainer(std::string url) { Void _ = wait(c->create()); - state int64_t versionShift = g_random->randomInt64(0, std::numeric_limits::max() - 500); + state std::vector> writes; + state std::map> snapshots; + state std::map snapshotSizes; + state int nRangeFiles = 0; + state std::map logs; + state Version v = g_random->randomInt64(0, std::numeric_limits::max() / 2); - state Reference log1 = wait(c->writeLogFile(100 + versionShift, 150 + versionShift, 10)); - state Reference log2 = wait(c->writeLogFile(150 + versionShift, 300 + versionShift, 10)); - state Version snapshotBeginVersion1 = 160 + versionShift; - state Version snapshotBeginVersion2 = 310 + versionShift; - state Reference range1 = wait(c->writeRangeFile(snapshotBeginVersion1, 0, snapshotBeginVersion1, 10)); - state Reference range2 = wait(c->writeRangeFile(snapshotBeginVersion1, 1, 300 + versionShift, 10)); - state Reference range3 = wait(c->writeRangeFile(snapshotBeginVersion2, 0, snapshotBeginVersion2, 10)); + // List of sizes to use to test edge cases on underlying file implementations + state std::vector fileSizes = {0, 10000000, 5000005}; - Void _ = wait( - writeAndVerifyFile(c, log1, 0) - && writeAndVerifyFile(c, log2, g_random->randomInt(0, 10000000)) - && writeAndVerifyFile(c, range1, g_random->randomInt(0, 1000)) - && writeAndVerifyFile(c, range2, g_random->randomInt(0, 100000)) - && writeAndVerifyFile(c, range3, g_random->randomInt(0, 3000000)) - ); + loop { + state Version logStart = v; + state int kvfiles = g_random->randomInt(0, 3); - Void _ = wait( - c->writeKeyspaceSnapshotFile({range1->getFileName(), range2->getFileName()}, range1->size() + range2->size()) - && c->writeKeyspaceSnapshotFile({range3->getFileName()}, range3->size()) - ); + while(kvfiles > 0) { + if(snapshots.empty()) { + snapshots[v] = {}; + snapshotSizes[v] = 0; + if(g_random->coinflip()) { + v = nextVersion(v); + } + } + Reference range = wait(c->writeRangeFile(snapshots.rbegin()->first, 0, v, 10)); + ++nRangeFiles; + v = nextVersion(v); + snapshots.rbegin()->second.push_back(range->getFileName()); - printf("Checking file list dump\n"); - FullBackupListing listing = wait(c->dumpFileList()); - ASSERT(listing.logs.size() == 2); - ASSERT(listing.ranges.size() == 3); - ASSERT(listing.snapshots.size() == 2); + int size = chooseFileSize(fileSizes); + snapshotSizes.rbegin()->second += size; + writes.push_back(writeAndVerifyFile(c, range, size)); + + if(g_random->random01() < .2) { + writes.push_back(c->writeKeyspaceSnapshotFile(snapshots.rbegin()->second, snapshotSizes.rbegin()->second)); + snapshots[v] = {}; + snapshotSizes[v] = 0; + break; + } + + --kvfiles; + } + + if(logStart == v || g_random->coinflip()) { + v = nextVersion(v); + } + state Reference log = wait(c->writeLogFile(logStart, v, 10)); + logs[logStart] = log->getFileName(); + int size = chooseFileSize(fileSizes); + writes.push_back(writeAndVerifyFile(c, log, size)); + + // Randomly stop after a snapshot has finished and all manually seeded file sizes have been used. + if(fileSizes.empty() && !snapshots.empty() && snapshots.rbegin()->second.empty() && g_random->random01() < .2) { + snapshots.erase(snapshots.rbegin()->first); + break; + } + } + + Void _ = wait(waitForAll(writes)); + + state FullBackupListing listing = wait(c->dumpFileList()); + ASSERT(listing.ranges.size() == nRangeFiles); + ASSERT(listing.logs.size() == logs.size()); + ASSERT(listing.snapshots.size() == snapshots.size()); state BackupDescription desc = wait(c->describeBackup()); - printf("Backup Description 1\n%s", desc.toString().c_str()); + printf("\n%s\n", desc.toString().c_str()); - ASSERT(desc.maxRestorableVersion.present()); - Optional rest = wait(c->getRestoreSet(desc.maxRestorableVersion.get())); - ASSERT(rest.present()); - ASSERT(rest.get().logs.size() == 0); - ASSERT(rest.get().ranges.size() == 1); + // Do a series of expirations and verify resulting state + state int i = 0; + for(; i < listing.snapshots.size(); ++i) { + // Ensure we can still restore to the latest version + Optional rest = wait(c->getRestoreSet(desc.maxRestorableVersion.get())); + ASSERT(rest.present()); - Optional rest = wait(c->getRestoreSet(150 + versionShift)); - ASSERT(!rest.present()); + // Ensure we can restore to the end version of snapshot i + Optional rest = wait(c->getRestoreSet(listing.snapshots[i].endVersion)); + ASSERT(rest.present()); - Optional rest = wait(c->getRestoreSet(300 + versionShift)); - ASSERT(rest.present()); - ASSERT(rest.get().logs.size() == 1); - ASSERT(rest.get().ranges.size() == 2); + // Test expiring to the end of this snapshot + state Version expireVersion = listing.snapshots[i].endVersion; - printf("Expire 1\n"); - Void _ = wait(c->expireData(100 + versionShift)); - BackupDescription d = wait(c->describeBackup()); - printf("Backup Description 2\n%s", d.toString().c_str()); - ASSERT(d.minLogBegin == 100 + versionShift); - ASSERT(d.maxRestorableVersion == desc.maxRestorableVersion); + // Expire everything up to but not including the snapshot end version + printf("EXPIRE TO %lld\n", expireVersion); + state Future f = c->expireData(expireVersion); + Void _ = wait(ready(f)); - printf("Expire 2\n"); - Void _ = wait(c->expireData(101 + versionShift)); - BackupDescription d = wait(c->describeBackup()); - printf("Backup Description 3\n%s", d.toString().c_str()); - ASSERT(d.minLogBegin == 100 + versionShift); - ASSERT(d.maxRestorableVersion == desc.maxRestorableVersion); + // If there is an error, it must be backup_cannot_expire and we have to be on the last snapshot + if(f.isError()) { + ASSERT(f.getError().code() == error_code_backup_cannot_expire); + ASSERT(i == listing.snapshots.size() - 1); + Void _ = wait(c->expireData(expireVersion, true)); + } - printf("Expire 3\n"); - Void _ = wait(c->expireData(300 + versionShift)); - BackupDescription d = wait(c->describeBackup()); - printf("Backup Description 4\n%s", d.toString().c_str()); - ASSERT(d.minLogBegin.present()); - ASSERT(d.snapshots.size() == desc.snapshots.size()); - ASSERT(d.maxRestorableVersion == desc.maxRestorableVersion); - - printf("Expire 4\n"); - Void _ = wait(c->expireData(301 + versionShift, true)); - BackupDescription d = wait(c->describeBackup()); - printf("Backup Description 4\n%s", d.toString().c_str()); - ASSERT(d.snapshots.size() == 1); - ASSERT(!d.minLogBegin.present()); + BackupDescription d = wait(c->describeBackup()); + printf("\n%s\n", d.toString().c_str()); + } + printf("DELETING\n"); Void _ = wait(c->deleteContainer()); BackupDescription d = wait(c->describeBackup()); - printf("Backup Description 5\n%s", d.toString().c_str()); + printf("\n%s\n", d.toString().c_str()); ASSERT(d.snapshots.size() == 0); ASSERT(!d.minLogBegin.present()); + FullBackupListing empty = wait(c->dumpFileList()); + ASSERT(empty.ranges.size() == 0); + ASSERT(empty.logs.size() == 0); + ASSERT(empty.snapshots.size() == 0); + printf("BackupContainerTest URL=%s PASSED.\n", url.c_str()); return Void(); From 3d68d6b9949fbef410cd06050000a2e6340e3a70 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sat, 24 Nov 2018 18:41:39 -0800 Subject: [PATCH 04/55] Bug fix, clarified a comment. --- fdbclient/BackupContainer.actor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index e42bd94c04..ab046308aa 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -454,7 +454,7 @@ public: }); } - // List range files which contain data at or between beginVersion and endVersion + // List range files, sorted in version order, which contain data at or between beginVersion and endVersion // Note: The contents of each top level snapshot.N folder do not necessarily constitute a valid snapshot // and therefore listing files is not how RestoreSets are obtained. // Note: Snapshots partially written using FDB versions prior to 6.0.16 will have some range files stored @@ -466,7 +466,7 @@ public: // Define filter function (for listFiles() implementations that use it) to reject any folder // starting after endVersion std::function pathFilter = [=](std::string const &path) { - return extractSnapshotBeginVersion(path) > endVersion; + return extractSnapshotBeginVersion(path) <= endVersion; }; Future> newFiles = map(listFiles("kvranges/", pathFilter), [=](const FilesAndSizesT &files) { From 32f434b2eeee3ffb1280bc740625756f2dfad099 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 26 Nov 2018 19:53:46 -0800 Subject: [PATCH 05/55] Bug fix, dns resolution would throw an error if any of the results were IPv6 addresses, which could happen depending on the host networking configuration. --- flow/Net2.actor.cpp | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 1b8da44908..2bcaf35625 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -835,11 +835,12 @@ Future< Reference > Net2::connect( NetworkAddress toAddr, std::stri } ACTOR static Future> resolveTCPEndpoint_impl( Net2 *self, std::string host, std::string service) { - Promise> result; + Promise> promise; + state Future> result = promise.getFuture(); self->tcpResolver.async_resolve(tcp::resolver::query(host, service), [=](const boost::system::error_code &ec, tcp::resolver::iterator iter) { if(ec) { - result.sendError(lookup_failed()); + promise.sendError(lookup_failed()); return; } @@ -847,18 +848,26 @@ ACTOR static Future> resolveTCPEndpoint_impl( Net2 * tcp::resolver::iterator end; while(iter != end) { - // The easiest way to get an ip:port formatted endpoint with this interface is with a string stream because - // endpoint::to_string doesn't exist but operator<< does. - std::stringstream s; - s << iter->endpoint(); - addrs.push_back(NetworkAddress::parse(s.str())); + auto endpoint = iter->endpoint(); + // Currently only ipv4 is supported by NetworkAddress + auto addr = endpoint.address(); + if(addr.is_v4()) { + addrs.push_back(NetworkAddress(addr.to_v4().to_ulong(), endpoint.port())); + } ++iter; } - result.send(addrs); + + if(addrs.empty()) { + promise.sendError(lookup_failed()); + } + else { + promise.send(addrs); + } }); - std::vector addresses = wait(result.getFuture()); - return addresses; + Void _ = wait(ready(result)); + + return result.get(); } Future> Net2::resolveTCPEndpoint( std::string host, std::string service) { From b91b26ef75e4c48c00fea3354fd7e2276e0ac10f Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 26 Nov 2018 20:02:03 -0800 Subject: [PATCH 06/55] Attempt at workaround of a rare issue where long running backup processes reach a state where DNS resolution requests always time out but other processes on the same host can still resolve successfully. In case this was somehow caused by a bad boost tcp_resolver state, each request now uses a unique tcp_resolver instance. --- flow/Net2.actor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 2bcaf35625..0b108ccf38 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -158,7 +158,6 @@ public: ASIOReactor reactor; INetworkConnections *network; // initially this, but can be changed - tcp::resolver tcpResolver; int64_t tsc_begin, tsc_end; double taskBegin; @@ -478,7 +477,6 @@ Net2::Net2(NetworkAddress localAddress, bool useThreadPool, bool useMetrics) : useThreadPool(useThreadPool), network(this), reactor(this), - tcpResolver(reactor.ios), stopped(false), tasksIssued(0), // Until run() is called, yield() will always yield @@ -835,10 +833,11 @@ Future< Reference > Net2::connect( NetworkAddress toAddr, std::stri } ACTOR static Future> resolveTCPEndpoint_impl( Net2 *self, std::string host, std::string service) { + state tcp::resolver tcpResolver(self->reactor.ios); Promise> promise; state Future> result = promise.getFuture(); - self->tcpResolver.async_resolve(tcp::resolver::query(host, service), [=](const boost::system::error_code &ec, tcp::resolver::iterator iter) { + tcpResolver.async_resolve(tcp::resolver::query(host, service), [=](const boost::system::error_code &ec, tcp::resolver::iterator iter) { if(ec) { promise.sendError(lookup_failed()); return; @@ -866,6 +865,7 @@ ACTOR static Future> resolveTCPEndpoint_impl( Net2 * }); Void _ = wait(ready(result)); + tcpResolver.cancel(); return result.get(); } From b6d9763469d53e7489860c733a897f5cfbc05700 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 27 Nov 2018 13:10:14 -0800 Subject: [PATCH 07/55] updated release notes for 6.0.16 --- documentation/sphinx/source/release-notes.rst | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 4c1a72da2f..38a9ccd91d 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -5,6 +5,21 @@ Release Notes 6.0.16 ====== +Performance +----------- + +* Added a new backup folder scheme which results in far fewer kv range folders. `(PR #939) `_ + +Fixes +----- + +* Blobstore REST client attempted to create buckets that already existed. `(PR #923) `_ +* DNS would fail if IPv6 responses were received. `(PR #945) `_ +* Backup expiration would occasionally fail due to an incorrect assert. `(PR #926) `_ + +6.0.15 +====== + Features -------- @@ -68,7 +83,6 @@ Fixes * HTTP client used by backup-to-blobstore now correctly treats response header field names as case insensitive. [6.0.15] `(PR #904) `_ * Blobstore REST client was not following the S3 API in several ways (bucket name, date, and response formats). [6.0.15] `(PR #914) `_ * Data distribution could queue shard movements for restoring replication at a low priority. [6.0.15] `(PR #907) `_ -* Blobstore REST client will no longer attempt to create a bucket that already exists. [6.0.16] `(PR #923) `_ Fixes only impacting 6.0.0+ --------------------------- From e948aadba3001829446835686bbd8ec8fa835d27 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 27 Nov 2018 13:21:37 -0800 Subject: [PATCH 08/55] update installer WIX GUID following release --- packaging/msi/FDBInstaller.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index 71db7fcb11..b95c57f8bf 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ Date: Tue, 27 Nov 2018 13:23:32 -0800 Subject: [PATCH 09/55] updated downloads for 6.0.16 --- documentation/sphinx/source/downloads.rst | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index 988b78e7b5..48dba89938 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-6.0.15.pkg `_ +* `FoundationDB-6.0.16.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-6.0.15-1_amd64.deb `_ -* `foundationdb-server-6.0.15-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-6.0.16-1_amd64.deb `_ +* `foundationdb-server-6.0.16-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-6.0.15-1.el6.x86_64.rpm `_ -* `foundationdb-server-6.0.15-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.0.16-1.el6.x86_64.rpm `_ +* `foundationdb-server-6.0.16-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-6.0.15-1.el7.x86_64.rpm `_ -* `foundationdb-server-6.0.15-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.0.16-1.el7.x86_64.rpm `_ +* `foundationdb-server-6.0.16-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-6.0.15-x64.msi `_ +* `foundationdb-6.0.16-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: -* `foundationdb-6.0.15.tar.gz `_ +* `foundationdb-6.0.16.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-6.0.15.gem `_ +* `fdb-6.0.16.gem `_ Java 8+ ------- -* `fdb-java-6.0.15.jar `_ -* `fdb-java-6.0.15-javadoc.jar `_ +* `fdb-java-6.0.16.jar `_ +* `fdb-java-6.0.16-javadoc.jar `_ Go 1.1+ ------- From 4a1c1ad860624bf774f25d121c6eb06b38c7fd74 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 27 Nov 2018 14:29:52 -0800 Subject: [PATCH 10/55] update versions target to 6.0.17 --- versions.target | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.target b/versions.target index 2ac042b444..66ff809c50 100644 --- a/versions.target +++ b/versions.target @@ -1,7 +1,7 @@ - 6.0.16 + 6.0.17 6.0 From 84e07de16175db83b268bd92793db03555df089a Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Tue, 27 Nov 2018 14:29:52 -0800 Subject: [PATCH 11/55] update installer WIX GUID following release --- packaging/msi/FDBInstaller.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index b95c57f8bf..7f3f5aa7bb 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ Date: Fri, 30 Nov 2018 10:55:19 -0800 Subject: [PATCH 12/55] Errors that occur in platform that are the result of IO issues are now raised as io_error rather than platform_error. --- fdbrpc/Platform.cpp | 5 ++-- flow/Error.h | 2 ++ flow/Platform.cpp | 69 ++++++++++++++++++++++++++++++++------------- 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/fdbrpc/Platform.cpp b/fdbrpc/Platform.cpp index 91db66245d..7fa8f060df 100644 --- a/fdbrpc/Platform.cpp +++ b/fdbrpc/Platform.cpp @@ -101,8 +101,9 @@ int eraseDirectoryRecursive(std::string const& dir) { the directory we're deleting doesn't exist in the first place */ if (error && errno != ENOENT) { - TraceEvent(SevError, "EraseDirectoryRecursiveError").detail("Directory", dir).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "EraseDirectoryRecursiveError").detail("Directory", dir).GetLastError().error(e); + throw e; } #else #error Port me! diff --git a/flow/Error.h b/flow/Error.h index 588507ab04..707720416a 100644 --- a/flow/Error.h +++ b/flow/Error.h @@ -68,6 +68,8 @@ private: enum Flags { FLAG_INJECTED_FAULT=1 }; }; +Error systemErrorCodeToError(); + #undef ERROR #define ERROR(name, number, description) inline Error name() { return Error( number ); }; enum { error_code_##name = number }; #include "error_definitions.h" diff --git a/flow/Platform.cpp b/flow/Platform.cpp index f3d70646eb..6272394a4d 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -432,22 +432,40 @@ void getMachineRAMInfo(MachineRAMInfo& memInfo) { #endif } +Error systemErrorCodeToError() { +#if defined(_WIN32) + if(GetLastError() == ERROR_IO_DEVICE) { + return io_error(); + } +#elif defined(__unixish__) + if(errno == EIO || errno == EROFS) { + return io_error(); + } +#else + #error Port me! +#endif + + return platform_error(); +} + void getDiskBytes(std::string const& directory, int64_t& free, int64_t& total) { INJECT_FAULT( platform_error, "getDiskBytes" ); #if defined(__unixish__) #ifdef __linux__ struct statvfs buf; if (statvfs(directory.c_str(), &buf)) { - TraceEvent(SevError, "GetDiskBytesStatvfsError").detail("Directory", directory).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "GetDiskBytesStatvfsError").detail("Directory", directory).GetLastError().error(e); + throw e; } uint64_t blockSize = buf.f_frsize; #elif defined(__APPLE__) struct statfs buf; if (statfs(directory.c_str(), &buf)) { - TraceEvent(SevError, "GetDiskBytesStatfsError").detail("Directory", directory).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError() + TraceEvent(SevError, "GetDiskBytesStatfsError").detail("Directory", directory).GetLastError().error(e); + throw e; } uint64_t blockSize = buf.f_bsize; @@ -466,7 +484,9 @@ void getDiskBytes(std::string const& directory, int64_t& free, int64_t& total) { ULARGE_INTEGER totalSpace; ULARGE_INTEGER totalFreeSpace; if( !GetDiskFreeSpaceEx( fullPath.c_str(), &freeSpace, &totalSpace, &totalFreeSpace ) ) { - TraceEvent(SevError, "DiskFreeError").detail("Path", fullPath).GetLastError(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "DiskFreeError").detail("Path", fullPath).GetLastError().error(e); + throw e; } total = std::min( (uint64_t) std::numeric_limits::max(), totalSpace.QuadPart ); free = std::min( (uint64_t) std::numeric_limits::max(), freeSpace.QuadPart ); @@ -814,8 +834,9 @@ void getDiskStatistics(std::string const& directory, uint64_t& currentIOs, uint6 struct statfs buf; if (statfs(directory.c_str(), &buf)) { - TraceEvent(SevError, "GetDiskStatisticsStatfsError").detail("Directory", directory).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "GetDiskStatisticsStatfsError").detail("Directory", directory).GetLastError().error(e); + throw e; } const char* dev = strrchr(buf.f_mntfromname, '/'); @@ -1709,8 +1730,9 @@ bool deleteFile( std::string const& filename ) { #else #error Port me! #endif - TraceEvent(SevError, "DeleteFile").detail("Filename", filename).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "DeleteFile").detail("Filename", filename).GetLastError().error(e); + throw errno; } static void createdDirectory() { INJECT_FAULT( platform_error, "createDirectory" ); } @@ -1734,8 +1756,9 @@ bool createDirectory( std::string const& directory ) { return createDirectory( directory ); } } - TraceEvent(SevError, "CreateDirectory").detail("Directory", directory).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "CreateDirectory").detail("Directory", directory).GetLastError().error(e); + throw e; #elif (defined(__linux__) || defined(__APPLE__)) size_t sep = 0; do { @@ -1744,12 +1767,16 @@ bool createDirectory( std::string const& directory ) { if (errno == EEXIST) continue; - TraceEvent(SevError, "CreateDirectory").detail("Directory", directory).GetLastError(); - if (errno == EACCES) - throw file_not_writable(); - else { - throw platform_error(); + Error e; + if(errno == EACCES) { + e = file_not_writable(); } + else { + e = systemErrorCodeToError(); + } + + TraceEvent(SevError, "CreateDirectory").detail("Directory", directory).GetLastError().error(e); + throw e; } createdDirectory(); } while (sep != std::string::npos && sep != directory.length() - 1); @@ -1768,8 +1795,9 @@ std::string abspath( std::string const& filename ) { #ifdef _WIN32 char nameBuffer[MAX_PATH]; if (!GetFullPathName(filename.c_str(), MAX_PATH, nameBuffer, NULL)) { - TraceEvent(SevError, "AbsolutePathError").detail("Filename", filename).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "AbsolutePathError").detail("Filename", filename).GetLastError().error(e); + throw e; } // Not totally obvious from the help whether GetFullPathName canonicalizes slashes, so let's do it... for(char*x = nameBuffer; *x; x++) @@ -1789,8 +1817,9 @@ std::string abspath( std::string const& filename ) { return joinPath( abspath( "." ), filename ); } } - TraceEvent(SevError, "AbsolutePathError").detail("Filename", filename).GetLastError(); - throw platform_error(); + Error e = systemErrorCodeToError(); + TraceEvent(SevError, "AbsolutePathError").detail("Filename", filename).GetLastError().error(e); + throw e; } return std::string(r); #else From 6e94d7d71aca5e4d1a6a953c61f1d49e446b357b Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 30 Nov 2018 18:52:24 -0800 Subject: [PATCH 13/55] changing the database configuration now checks that enough processes exist to support the new configuration --- fdbcli/fdbcli.actor.cpp | 12 +++++++++- fdbclient/ManagementAPI.actor.cpp | 39 +++++++++++++++++++++++++++++++ fdbclient/ManagementAPI.h | 1 + 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/fdbcli/fdbcli.actor.cpp b/fdbcli/fdbcli.actor.cpp index 3f025aea9a..61058409af 100644 --- a/fdbcli/fdbcli.actor.cpp +++ b/fdbcli/fdbcli.actor.cpp @@ -1624,6 +1624,11 @@ ACTOR Future configure( Database db, std::vector tokens, Refere printf("Type `configure FORCE *' to configure without this check\n"); ret=false; break; + case ConfigurationResult::NOT_ENOUGH_WORKERS: + printf("ERROR: Not enough processes exist to support the specified configuration\n"); + printf("Type `configure FORCE *' to configure without this check\n"); + ret=false; + break; case ConfigurationResult::SUCCESS: printf("Configuration changed\n"); ret=false; @@ -1730,7 +1735,12 @@ ACTOR Future fileConfigure(Database db, std::string filePath, bool isNewDa break; case ConfigurationResult::REGIONS_CHANGED: printf("ERROR: The region configuration cannot be changed while simultaneously changing usable_regions\n"); - printf("Type `fileconfigure FORCE *' to configure without this check\n"); + printf("Type `fileconfigure FORCE ' to configure without this check\n"); + ret=false; + break; + case ConfigurationResult::NOT_ENOUGH_WORKERS: + printf("ERROR: Not enough processes exist to support the specified configuration\n"); + printf("Type `fileconfigure FORCE ' to configure without this check\n"); ret=false; break; case ConfigurationResult::SUCCESS: diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index f711bf1cc1..86eb89ac09 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -293,6 +293,7 @@ ACTOR Future changeConfig( Database cx, std::map> fConfig = tr.getRange(configKeys, CLIENT_KNOBS->TOO_MANY); + state Future> fWorkers = getWorkers(&tr); Void _ = wait( success(fConfig) || tooLong ); if(!fConfig.isReady()) { @@ -377,6 +378,44 @@ ACTOR Future changeConfig( Database cx, std::map, std::set>> dcId_zoneIds; + for(auto& it : fWorkers.get()) { + if( it.processClass.machineClassFitness(ProcessClass::Storage) <= ProcessClass::WorstFit ) { + dcId_zoneIds[it.locality.dcId()].insert(it.locality.zoneId()); + } + } + for(auto& region : newConfig.regions) { + if(dcId_zoneIds[region.dcId].size() < std::max(newConfig.storageTeamSize, newConfig.tLogReplicationFactor)) { + return ConfigurationResult::NOT_ENOUGH_WORKERS; + } + if(region.satelliteTLogReplicationFactor > 0 && region.priority >= 0) { + int totalSatelliteProcesses = 0; + for(auto& sat : region.satellites) { + totalSatelliteProcesses += dcId_zoneIds[sat.dcId].size(); + } + if(totalSatelliteProcesses < region.satelliteTLogReplicationFactor) { + return ConfigurationResult::NOT_ENOUGH_WORKERS; + } + } + } + } else { + std::set> zoneIds; + for(auto& it : fWorkers.get()) { + if( it.processClass.machineClassFitness(ProcessClass::Storage) <= ProcessClass::WorstFit ) { + zoneIds.insert(it.locality.zoneId()); + } + } + if(zoneIds.size() < std::max(newConfig.storageTeamSize, newConfig.tLogReplicationFactor)) { + return ConfigurationResult::NOT_ENOUGH_WORKERS; + } + } } } diff --git a/fdbclient/ManagementAPI.h b/fdbclient/ManagementAPI.h index 4bdb35d9f7..ce36d60f88 100644 --- a/fdbclient/ManagementAPI.h +++ b/fdbclient/ManagementAPI.h @@ -54,6 +54,7 @@ public: REGION_NOT_FULLY_REPLICATED, MULTIPLE_ACTIVE_REGIONS, REGIONS_CHANGED, + NOT_ENOUGH_WORKERS, SUCCESS }; }; From e2673b2bbc0e1ff7b66532e64e01a42a52fe0af7 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sun, 2 Dec 2018 22:03:21 -0800 Subject: [PATCH 14/55] Fixed typo causing mac build failure. --- flow/Platform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 6272394a4d..6ec7cd2f65 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -463,7 +463,7 @@ void getDiskBytes(std::string const& directory, int64_t& free, int64_t& total) { #elif defined(__APPLE__) struct statfs buf; if (statfs(directory.c_str(), &buf)) { - Error e = systemErrorCodeToError() + Error e = systemErrorCodeToError(); TraceEvent(SevError, "GetDiskBytesStatfsError").detail("Directory", directory).GetLastError().error(e); throw e; } From e974ee367d66a7e62147cba08d436a592540dad2 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sun, 2 Dec 2018 22:22:24 -0800 Subject: [PATCH 15/55] Bug fix, backups in progress during upgrade to 6.0.16 or later do not have have range file count initialized so reading it must default to 0. --- fdbclient/FileBackupAgent.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 fdbclient/FileBackupAgent.actor.cpp diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp old mode 100644 new mode 100755 index cfff72a833..e5fd4a27a3 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -1143,7 +1143,7 @@ namespace fileBackup { Void _ = wait(taskBucket->keepRunning(tr, task) && storeOrThrow(backup.snapshotBeginVersion().get(tr), snapshotBeginVersion) - && storeOrThrow(backup.snapshotRangeFileCount().get(tr), snapshotRangeFileCount) + && store(backup.snapshotRangeFileCount().getD(tr), snapshotRangeFileCount) ); break; From 993522381819d03005f95836cc70eec8cec7fe0a Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 3 Dec 2018 16:30:22 -0800 Subject: [PATCH 16/55] updated documentation for 6.0.17 --- documentation/sphinx/source/downloads.rst | 24 +++++++++---------- documentation/sphinx/source/release-notes.rst | 8 +++++++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index 48dba89938..611345df8d 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-6.0.16.pkg `_ +* `FoundationDB-6.0.17.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-6.0.16-1_amd64.deb `_ -* `foundationdb-server-6.0.16-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-6.0.17-1_amd64.deb `_ +* `foundationdb-server-6.0.17-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-6.0.16-1.el6.x86_64.rpm `_ -* `foundationdb-server-6.0.16-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.0.17-1.el6.x86_64.rpm `_ +* `foundationdb-server-6.0.17-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-6.0.16-1.el7.x86_64.rpm `_ -* `foundationdb-server-6.0.16-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.0.17-1.el7.x86_64.rpm `_ +* `foundationdb-server-6.0.17-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-6.0.16-x64.msi `_ +* `foundationdb-6.0.17-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: -* `foundationdb-6.0.16.tar.gz `_ +* `foundationdb-6.0.17.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-6.0.16.gem `_ +* `fdb-6.0.17.gem `_ Java 8+ ------- -* `fdb-java-6.0.16.jar `_ -* `fdb-java-6.0.16-javadoc.jar `_ +* `fdb-java-6.0.17.jar `_ +* `fdb-java-6.0.17-javadoc.jar `_ Go 1.1+ ------- diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 38a9ccd91d..94bf9872eb 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -2,6 +2,14 @@ Release Notes ############# +6.0.17 +====== + +Fixes +----- + +* Existing backups did not make progress when upgraded to 6.0.16. `(PR #962) `_ + 6.0.16 ====== From c41ccb687d2efaa075a0e140ec77ca31cc9d9e79 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 3 Dec 2018 16:32:42 -0800 Subject: [PATCH 17/55] update installer WIX GUID following release --- packaging/msi/FDBInstaller.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index 7f3f5aa7bb..64a31bb70a 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ Date: Mon, 3 Dec 2018 18:21:35 -0800 Subject: [PATCH 18/55] update versions target to 6.0.18 --- versions.target | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.target b/versions.target index 66ff809c50..195bb478a2 100644 --- a/versions.target +++ b/versions.target @@ -1,7 +1,7 @@ - 6.0.17 + 6.0.18 6.0 From f6c9cf89bfccd4fba211977638295d2ca70f8b96 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 3 Dec 2018 18:21:35 -0800 Subject: [PATCH 19/55] update installer WIX GUID following release --- packaging/msi/FDBInstaller.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index 64a31bb70a..d8278c2e91 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ Date: Sun, 16 Dec 2018 00:18:13 -0800 Subject: [PATCH 20/55] Rewrote backup metadata scheme to fix several design flaws involving cancelled or concurrent operations. Most importantly, before an expire deletes any data it marks the end of the range being deleted as 'unreliable' so further reads of the backup will treat versions before that point as having data holes. --- fdbclient/BackupContainer.actor.cpp | 175 ++++++++++++++++------------ fdbclient/BackupContainer.h | 9 ++ fdbclient/BlobStore.actor.cpp | 11 +- 3 files changed, 117 insertions(+), 78 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index ab046308aa..b8987592a8 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -123,6 +123,10 @@ std::string BackupDescription::toString() const { info.append(format("SnapshotBytes: %lld\n", snapshotBytes)); + if(expiredEndVersion.present()) + info.append(format("ExpiredEndVersion: %s\n", formatVersion(expiredEndVersion.get()).c_str())); + if(unreliableEndVersion.present()) + info.append(format("UnreliableEndVersion: %s\n", formatVersion(unreliableEndVersion.get()).c_str())); if(minLogBegin.present()) info.append(format("MinLogBeginVersion: %s\n", formatVersion(minLogBegin.get()).c_str())); if(contiguousLogEnd.present()) @@ -518,27 +522,61 @@ public: state BackupDescription desc; desc.url = bc->getURL(); - // This is the range of logs we'll have to list to determine log continuity - state Version scanBegin = 0; - state Version scanEnd = std::numeric_limits::max(); - - // Get range for which we know there are logs, if available - state Optional begin; - state Optional end; + // Get metadata versions + state Optional metaLogBegin; + state Optional metaLogEnd; + state Optional metaExpiredEnd; + state Optional metaUnreliableEnd; + // For a deep scan, do not use the version boundary metadata if(!deepScan) { - Void _ = wait(store(bc->logBeginVersion().get(), begin) && store(bc->logEndVersion().get(), end)); + Void _ = wait(store(bc->logBeginVersion().get(), metaLogBegin) && store(bc->logEndVersion().get(), metaLogEnd) + && store(bc->expiredEndVersion().get(), metaExpiredEnd) && store(bc->unreliableEndVersion().get(), metaUnreliableEnd)); } + TraceEvent("BackupContainerMetadata") + .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(-1)) + .detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(-1)) + .detail("LogBeginVersion", metaLogBegin.orDefault(-1)) + .detail("LogEndVersion", metaLogEnd.orDefault(-1)); + + // Don't use metaLogBegin or metaLogEnd if any of the following are true + // - either are missing + // - metaLogEnd <= metaLogBegin + // - metaLogEnd < metaExpiredEnd + // - metaLogEnd < metaUnreliableEnd + if(!metaLogBegin.present() || !metaLogEnd.present() + || metaLogEnd.get() <= metaLogBegin.get() + || metaLogEnd.get() < metaExpiredEnd.orDefault(-1) + || metaLogEnd.get() < metaUnreliableEnd.orDefault(-1) + ) { + TraceEvent(SevWarnAlways, "BackupContainerMetadataInvalid") + .detail("LogBeginVersion", metaLogBegin.orDefault(-1)) + .detail("LogEndVersion", metaLogEnd.orDefault(-1)); + + metaLogBegin = Optional(); + metaLogEnd = Optional(); + } + + // If the unreliable end version is not set or is < expiredEndVersion then default it to expiredEndVersion + if(!metaUnreliableEnd.present() || metaUnreliableEnd.get() < metaExpiredEnd.orDefault(0)) + metaUnreliableEnd = metaExpiredEnd; + + desc.unreliableEndVersion = metaUnreliableEnd; + desc.expiredEndVersion = metaExpiredEnd; + + // Start scanning for files at a version after which nothing has been deleted by an expire or 0 + state Version scanBegin = desc.unreliableEndVersion.orDefault(0); + state Version scanEnd = std::numeric_limits::max(); + // Use the known log range if present - if(begin.present() && end.present()) { - // Logs are assumed to be contiguious between begin and max(begin, end), so initalize desc accordingly - // The use of max() is to allow for a stale end version that has been exceeded by begin version - desc.minLogBegin = begin.get(); - desc.maxLogEnd = std::max(begin.get(), end.get()); + if(metaLogBegin.present() && metaLogEnd.present()) { + // Logs are assumed to be contiguious between metaLogBegin and metaLogEnd, so initalize desc accordingly + desc.minLogBegin = metaLogBegin.get(); + desc.maxLogEnd = metaLogEnd.get(); desc.contiguousLogEnd = desc.maxLogEnd; - // Begin file scan at the contiguous log end version + // Move scanBegin forward (higher) to the contiguous log end version scanBegin = desc.contiguousLogEnd.get(); } @@ -570,20 +608,26 @@ public: } } - // Try to update the saved log versions if they are not set and we have values for them, - // but ignore errors in the update attempt in case the container is not writeable - // Also update logEndVersion if it has a value but it is less than contiguousLogEnd + // If the log metadata begin/end versions are missing (or treated as missing due to invalidity) or + // differ from the newly calculated values for minLogBegin and contiguousLogEnd, respectively, + // then attempt to update the metadata in the backup container but ignore errors in case the + // container is not writeable. try { state Future updates = Void(); - if(desc.minLogBegin.present() && !begin.present()) + + if(desc.minLogBegin.present() && metaLogBegin != desc.minLogBegin) { updates = updates && bc->logBeginVersion().set(desc.minLogBegin.get()); - if(desc.contiguousLogEnd.present() && (!end.present() || end.get() < desc.contiguousLogEnd.get()) ) + } + + if(desc.contiguousLogEnd.present() && metaLogEnd != desc.contiguousLogEnd) { updates = updates && bc->logEndVersion().set(desc.contiguousLogEnd.get()); + } + Void _ = wait(updates); } catch(Error &e) { if(e.code() == error_code_actor_cancelled) throw; - TraceEvent(SevWarn, "BackupContainerSafeVersionUpdateFailure").detail("URL", bc->getURL()); + TraceEvent(SevWarn, "BackupContainerMetadataUpdateFailure").detail("URL", bc->getURL()); } for(auto &s : desc.snapshots) { @@ -631,11 +675,15 @@ public: if(restorableBeginVersion < expireEndVersion) throw backup_cannot_expire(); - state Version scanBegin = 0; - // Get the backup description. state BackupDescription desc = wait(bc->describeBackup()); + // If the expire request is to a version at or before the previous version to which data was already deleted + // then do nothing and just return + if(expireEndVersion <= desc.expiredEndVersion.orDefault(-1)) { + return Void(); + } + // Assume force is needed, then try to prove otherwise. // Force is required if there is not a restorable snapshot which both // - begins at or after expireEndVersion @@ -648,27 +696,12 @@ public: } } - // Get metadata - state Optional expiredEnd; - state Optional logBegin; - state Optional logEnd; - Void _ = wait(store(bc->expiredEndVersion().get(), expiredEnd) && store(bc->logBeginVersion().get(), logBegin) && store(bc->logEndVersion().get(), logEnd)); + // Start scan for files to delete at the last completed expire operation's end or 0. + state Version scanBegin = desc.expiredEndVersion.orDefault(0); - // Update scan range if expiredEnd is present - if(expiredEnd.present()) { - if(expireEndVersion <= expiredEnd.get()) { - // If the expire request is to the version already expired to then there is no work to do so return true - return Void(); - } - scanBegin = expiredEnd.get(); - } - - TraceEvent("BackupContainerFileSystem") + TraceEvent("BackupContainerFileSystemExpire") .detail("ExpireEndVersion", expireEndVersion) - .detail("ScanBeginVersion", scanBegin) - .detail("CachedLogBegin", logBegin.orDefault(-1)) - .detail("CachedLogEnd", logEnd.orDefault(-1)) - .detail("CachedExpiredEnd", expiredEnd.orDefault(-1)); + .detail("ScanBeginVersion", scanBegin); // Get log files that contain any data at or before expireEndVersion state std::vector logs = wait(bc->listLogFiles(scanBegin, expireEndVersion - 1)); @@ -720,37 +753,20 @@ public: } desc = BackupDescription(); + for(auto &f : toDelete) { + printf("delete: %s\n", f.c_str()); + } + // If some files to delete were found AND force is needed AND the force option is NOT set, then fail if(!toDelete.empty() && forceNeeded && !force) throw backup_cannot_expire(); - // We are about to start deleting files, at which point no data prior to the expire end version can be - // safely assumed to exist. The [logBegin, logEnd) range from the container's metadata describes - // a range of log versions which can be assumed to exist, so if the range of data being deleted overlaps - // that range then the metadata range must be updated. - - // If we're expiring the entire log range described by the metadata then clear both metadata values - if(logEnd.present() && logEnd.get() < expireEndVersion) { - if(logBegin.present()) - Void _ = wait(bc->logBeginVersion().clear()); - if(logEnd.present()) - Void _ = wait(bc->logEndVersion().clear()); - } - else { - // If we are expiring to a point within the metadata range then update the begin if we have a new - // log begin version (which we should!) or clear the metadata range if we do not (which would be - // repairing the metadata from an incorrect state) - if(logBegin.present() && logBegin.get() < expireEndVersion) { - if(newLogBeginVersion.present()) { - Void _ = wait(bc->logBeginVersion().set(newLogBeginVersion.get())); - } - else { - if(logBegin.present()) - Void _ = wait(bc->logBeginVersion().clear()); - if(logEnd.present()) - Void _ = wait(bc->logEndVersion().clear()); - } - } + // We are about to start deleting files, at which point all data prior to expireEndVersion is considered + // 'unreliable' as some or all of it will be missing. So before deleting anything, read unreliableEndVersion + // (don't use cached value in desc) and update its value if it is missing or < expireEndVersion + Optional metaUnreliableEnd = wait(bc->unreliableEndVersion().get()); + if(metaUnreliableEnd.orDefault(0) < expireEndVersion) { + Void _ = wait(bc->unreliableEndVersion().set(expireEndVersion)); } // Delete files, but limit parallelism because the file list could use a lot of memory and the corresponding @@ -776,8 +792,12 @@ public: } } - // Update the expiredEndVersion property. - Void _ = wait(bc->expiredEndVersion().set(expireEndVersion)); + // Update the expiredEndVersion metadata to indicate that everything prior to that version has been + // successfully deleted. a if existing value is < expiredEndVersion + Optional metaExpiredEnd = wait(bc->expiredEndVersion().get()); + if(metaExpiredEnd.orDefault(0) < expireEndVersion) { + Void _ = wait(bc->expiredEndVersion().set(expireEndVersion)); + } return Void(); } @@ -858,18 +878,19 @@ private: public: // To avoid the need to scan the underyling filesystem in many cases, some important version boundaries are stored in named files. - // These files can be deleted from the filesystem if they appear to be wrong or corrupt, and full scans will done - // when needed. + // These versions also indicate what version ranges are known to be deleted or partially deleted. // - // The three versions below, when present, describe 4 version ranges which collectively cover the entire version timeline. - // 0 - expiredEndVersion: All files in this range have been deleted - // expiredEndVersion - presentBeginVersion: Files in this range *may* have been deleted so their presence must not be assumed. - // presentBeginVersion - presentEndVersion: Files in this range have NOT been deleted by any FDB backup operations. - // presentEndVersion - infinity: Files in this range may or may not exist yet. Scan to find what is there. + // The values below describe version ranges as follows: + // 0 - expiredEndVersion All files in this range have been deleted + // expiredEndVersion - unreliableEndVersion Some files in this range may have been deleted. + // + // logBeginVersion - logEnd Log files are contiguous in this range and have NOT been deleted by fdbbackup + // logEnd - infinity Files in this range may or may not exist yet // VersionProperty logBeginVersion() { return {Reference::addRef(this), "log_begin_version"}; } VersionProperty logEndVersion() { return {Reference::addRef(this), "log_end_version"}; } VersionProperty expiredEndVersion() { return {Reference::addRef(this), "expired_end_version"}; } + VersionProperty unreliableEndVersion() { return {Reference::addRef(this), "unreliable_end_version"}; } ACTOR static Future writeVersionProperty(Reference bc, std::string path, Version v) { try { diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index 58bc536d09..bbc696f96a 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -108,10 +108,19 @@ struct BackupDescription { std::string url; std::vector snapshots; int64_t snapshotBytes; + // The version before which everything has been deleted by an expire + Optional expiredEndVersion; + // The latest version before which at least some data has been deleted by an expire + Optional unreliableEndVersion; + // The minimum log version in the backup Optional minLogBegin; + // The maximum log version in the backup Optional maxLogEnd; + // The maximum log version for which there is contiguous log version coverage extending back to minLogBegin Optional contiguousLogEnd; + // The maximum version which this backup can be used to restore to Optional maxRestorableVersion; + // The minimum version which this backup can be used to restore to Optional minRestorableVersion; std::string extendedDetail; // Freeform container-specific info. diff --git a/fdbclient/BlobStore.actor.cpp b/fdbclient/BlobStore.actor.cpp index 911947634a..32d0864f70 100644 --- a/fdbclient/BlobStore.actor.cpp +++ b/fdbclient/BlobStore.actor.cpp @@ -258,8 +258,17 @@ ACTOR Future deleteObject_impl(Reference b, std::string std::string resource = std::string("/") + bucket + "/" + object; HTTP::Headers headers; + // 200 or 204 means object successfully deleted, 404 means it already doesn't exist, so any of those are considered successful Reference r = wait(b->doRequest("DELETE", resource, headers, NULL, 0, {200, 204, 404})); - // 200 means object deleted, 404 means it doesn't exist already, so either success code passed above is fine. + + // But if the object already did not exist then the 'delete' is assumed to be successful but a warning is logged. + if(r->code == 404) { + TraceEvent(SevWarnAlways, "BlobStoreEndpointDeleteObjectMissing") + .detail("Host", b->host) + .detail("Bucket", bucket) + .detail("Object", object); + } + return Void(); } From 5951e9d577241df2951b9888b7907dc798a835cb Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sun, 16 Dec 2018 00:33:30 -0800 Subject: [PATCH 21/55] Added backup URL and exceptions to trace events where applicable. --- fdbclient/BackupContainer.actor.cpp | 37 ++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index b8987592a8..d8e88b5f4a 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -535,6 +535,7 @@ public: } TraceEvent("BackupContainerMetadata") + .detail("URL", bc->getURL()) .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(-1)) .detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(-1)) .detail("LogBeginVersion", metaLogBegin.orDefault(-1)) @@ -551,6 +552,7 @@ public: || metaLogEnd.get() < metaUnreliableEnd.orDefault(-1) ) { TraceEvent(SevWarnAlways, "BackupContainerMetadataInvalid") + .detail("URL", bc->getURL()) .detail("LogBeginVersion", metaLogBegin.orDefault(-1)) .detail("LogEndVersion", metaLogEnd.orDefault(-1)); @@ -627,7 +629,9 @@ public: } catch(Error &e) { if(e.code() == error_code_actor_cancelled) throw; - TraceEvent(SevWarn, "BackupContainerMetadataUpdateFailure").detail("URL", bc->getURL()); + TraceEvent(SevWarn, "BackupContainerMetadataUpdateFailure") + .error(e) + .detail("URL", bc->getURL()); } for(auto &s : desc.snapshots) { @@ -700,6 +704,7 @@ public: state Version scanBegin = desc.expiredEndVersion.orDefault(0); TraceEvent("BackupContainerFileSystemExpire") + .detail("URL", bc->getURL()) .detail("ExpireEndVersion", expireEndVersion) .detail("ScanBeginVersion", scanBegin); @@ -900,7 +905,10 @@ public: Void _ = wait(f->finish()); return Void(); } catch(Error &e) { - TraceEvent(SevWarn, "BackupContainerWritePropertyFailed").error(e).detail("Path", path); + TraceEvent(SevWarn, "BackupContainerWritePropertyFailed") + .error(e) + .detail("URL", bc->getURL()) + .detail("Path", path); throw; } } @@ -917,12 +925,20 @@ public: if(rs == size && sscanf(s.c_str(), "%lld%n", &v, &len) == 1 && len == size) return v; - TraceEvent(SevWarn, "BackupContainerInvalidProperty"); + TraceEvent(SevWarn, "BackupContainerInvalidProperty") + .detail("URL", bc->getURL()) + .detail("Path", path); + throw backup_invalid_info(); } catch(Error &e) { if(e.code() == error_code_file_not_found) return Optional(); - TraceEvent(SevWarn, "BackupContainerReadPropertyFailed").error(e).detail("Path", path); + + TraceEvent(SevWarn, "BackupContainerReadPropertyFailed") + .error(e) + .detail("URL", bc->getURL()) + .detail("Path", path); + throw; } } @@ -1305,10 +1321,12 @@ Reference IBackupContainer::openContainer(std::string url) throw; TraceEvent m(SevWarn, "BackupContainer"); - m.detail("Description", "Invalid container specification. See help.").detail("URL", url); - + m.detail("Description", "Invalid container specification. See help."); + m.detail("URL", url); + m.error(e); if(e.code() == error_code_backup_invalid_url) m.detail("LastOpenError", lastOpenError); + throw; } } @@ -1349,10 +1367,13 @@ ACTOR Future> listContainers_impl(std::string baseURL) throw; TraceEvent m(SevWarn, "BackupContainer"); - m.detail("Description", "Invalid backup container URL prefix. See help.").detail("URL", baseURL); - + + m.detail("Description", "Invalid backup container URL prefix. See help."); + m.detail("URL", baseURL); + m.error(e); if(e.code() == error_code_backup_invalid_url) m.detail("LastOpenError", IBackupContainer::lastOpenError); + throw; } } From dac1827d23ff925b78c5649ed31802db270a20db Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Sun, 16 Dec 2018 00:41:38 -0800 Subject: [PATCH 22/55] Backup describe's "deep scan" mode should only ignore log begin/end versions, not expire and unreliable end versions. --- fdbclient/BackupContainer.actor.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index d8e88b5f4a..f10f5ba862 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -528,10 +528,14 @@ public: state Optional metaExpiredEnd; state Optional metaUnreliableEnd; - // For a deep scan, do not use the version boundary metadata + std::vector> metaReads; + metaReads.push_back(store(bc->expiredEndVersion().get(), metaExpiredEnd)); + metaReads.push_back(store(bc->unreliableEndVersion().get(), metaUnreliableEnd)); + + // Only read log begin/end versions if not doing a deep scan, otherwise scan files and recalculae them. if(!deepScan) { - Void _ = wait(store(bc->logBeginVersion().get(), metaLogBegin) && store(bc->logEndVersion().get(), metaLogEnd) - && store(bc->expiredEndVersion().get(), metaExpiredEnd) && store(bc->unreliableEndVersion().get(), metaUnreliableEnd)); + metaReads.push_back(store(bc->logBeginVersion().get(), metaLogBegin)); + metaReads.push_back(store(bc->logEndVersion().get(), metaLogEnd)); } TraceEvent("BackupContainerMetadata") From 9ef9041fba904e3551e3d3bcd324982f37737a93 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 17 Dec 2018 13:13:35 -0800 Subject: [PATCH 23/55] Bug fix, metadata read futures were moved to a vector but then not waited on. --- fdbclient/BackupContainer.actor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index f10f5ba862..6bada6ae47 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -538,6 +538,8 @@ public: metaReads.push_back(store(bc->logEndVersion().get(), metaLogEnd)); } + Void _ = wait(waitForAll(metaReads)); + TraceEvent("BackupContainerMetadata") .detail("URL", bc->getURL()) .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(-1)) From 779a4b1e3b6ee40fde768bb5f194ab04ee79287c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 17 Dec 2018 18:39:14 -0800 Subject: [PATCH 24/55] Static de-initialization order isn't well thought out so use flushAndExit() instead of returning from main after runNetwork() has been called. --- fdbbackup/backup.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index f9e4276e9c..043dcf16d6 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -3009,5 +3009,5 @@ int main(int argc, char* argv[]) { status = FDB_EXIT_MAIN_EXCEPTION; } - return status; + flushAndExit(status); } From 69d847dbbd3138c0ce7c0abe48f15cb42834e07d Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 18 Dec 2018 04:33:37 -0800 Subject: [PATCH 25/55] Backup describe can now analyze a backup assuming log data starts at a given version. This is used by expire both as an optimization and because doing so enables expire to repair a bad metadata state that can result from a cancelled or failed expire operation from fdbbackup <= 6.0.17. ListLogFiles() and ListRangeFiles() no longer sort results as in most cases the sort is not required and the result set can be very large. Describe will now update minLogBegin metadata to the beginning of the log range known to be present when possible. Several serial log and range list pairings are now done in parallel. --- fdbclient/BackupContainer.actor.cpp | 121 +++++++++++++++++----------- fdbclient/BackupContainer.h | 9 ++- 2 files changed, 78 insertions(+), 52 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 6bada6ae47..f22432cbda 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -400,8 +400,7 @@ public: return writeKeyspaceSnapshotFile_impl(Reference::addRef(this), fileNames, totalBytes); }; - // List log files which contain data at any version >= beginVersion and <= targetVersion - // Lists files in sorted order by begin version. Does not check that results are non overlapping or contiguous. + // List log files, unsorted, which contain data at any version >= beginVersion and <= targetVersion Future> listLogFiles(Version beginVersion = 0, Version targetVersion = std::numeric_limits::max()) { // The first relevant log file could have a begin version less than beginVersion based on the knobs which determine log file range size, // so start at an earlier version adjusted by how many versions a file could contain. @@ -427,12 +426,11 @@ public: if(pathToLogFile(lf, f.first, f.second) && lf.endVersion > beginVersion && lf.beginVersion <= targetVersion) results.push_back(lf); } - std::sort(results.begin(), results.end()); return results; }); } - // List range files which contain data at or between beginVersion and endVersion + // List range files, unsorted, which contain data at or between beginVersion and endVersion // NOTE: This reads the range file folder schema from FDB 6.0.15 and earlier and is provided for backward compatibility Future> old_listRangeFiles(Version beginVersion, Version endVersion) { // Get the cleaned (without slashes) first and last folders that could contain relevant results. @@ -458,7 +456,7 @@ public: }); } - // List range files, sorted in version order, which contain data at or between beginVersion and endVersion + // List range files, unsorted, which contain data at or between beginVersion and endVersion // Note: The contents of each top level snapshot.N folder do not necessarily constitute a valid snapshot // and therefore listing files is not how RestoreSets are obtained. // Note: Snapshots partially written using FDB versions prior to 6.0.16 will have some range files stored @@ -487,7 +485,6 @@ public: std::vector results = std::move(newFiles.get()); std::vector oldResults = std::move(oldFiles.get()); results.insert(results.end(), std::make_move_iterator(oldResults.begin()), std::make_move_iterator(oldResults.end())); - std::sort(results.begin(), results.end()); return results; }); } @@ -518,7 +515,7 @@ public: return dumpFileList_impl(Reference::addRef(this)); } - ACTOR static Future describeBackup_impl(Reference bc, bool deepScan) { + ACTOR static Future describeBackup_impl(Reference bc, bool deepScan, Version logStartVersionOverride) { state BackupDescription desc; desc.url = bc->getURL(); @@ -532,7 +529,7 @@ public: metaReads.push_back(store(bc->expiredEndVersion().get(), metaExpiredEnd)); metaReads.push_back(store(bc->unreliableEndVersion().get(), metaUnreliableEnd)); - // Only read log begin/end versions if not doing a deep scan, otherwise scan files and recalculae them. + // Only read log begin/end versions if not doing a deep scan, otherwise scan files and recalculate them. if(!deepScan) { metaReads.push_back(store(bc->logBeginVersion().get(), metaLogBegin)); metaReads.push_back(store(bc->logEndVersion().get(), metaLogEnd)); @@ -540,58 +537,75 @@ public: Void _ = wait(waitForAll(metaReads)); - TraceEvent("BackupContainerMetadata") + TraceEvent("BackupContainerDescribe") .detail("URL", bc->getURL()) - .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(-1)) - .detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(-1)) - .detail("LogBeginVersion", metaLogBegin.orDefault(-1)) - .detail("LogEndVersion", metaLogEnd.orDefault(-1)); + .detail("LogStartVersionOverride", logStartVersionOverride) + .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(invalidVersion)) + .detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(invalidVersion)) + .detail("LogBeginVersion", metaLogBegin.orDefault(invalidVersion)) + .detail("LogEndVersion", metaLogEnd.orDefault(invalidVersion)); - // Don't use metaLogBegin or metaLogEnd if any of the following are true + // If the logStartVersionOverride is set then ensure that unreliableEndVersion is at least as much + if(logStartVersionOverride != invalidVersion && metaUnreliableEnd.orDefault(invalidVersion) < logStartVersionOverride) { + metaUnreliableEnd = logStartVersionOverride; + } + + // Don't use metaLogBegin or metaLogEnd if any of the following are true, the safest + // thing to do is rescan to verify log continuity and get exact begin/end versions // - either are missing - // - metaLogEnd <= metaLogBegin - // - metaLogEnd < metaExpiredEnd - // - metaLogEnd < metaUnreliableEnd + // - metaLogEnd <= metaLogBegin (invalid range) + // - metaLogEnd < metaExpiredEnd (log continuity exists in missing data range) + // - metaLogEnd < metaUnreliableEnd (log continuity exists in incomplete data range) if(!metaLogBegin.present() || !metaLogEnd.present() || metaLogEnd.get() <= metaLogBegin.get() - || metaLogEnd.get() < metaExpiredEnd.orDefault(-1) - || metaLogEnd.get() < metaUnreliableEnd.orDefault(-1) + || metaLogEnd.get() < metaExpiredEnd.orDefault(invalidVersion) + || metaLogEnd.get() < metaUnreliableEnd.orDefault(invalidVersion) ) { TraceEvent(SevWarnAlways, "BackupContainerMetadataInvalid") .detail("URL", bc->getURL()) - .detail("LogBeginVersion", metaLogBegin.orDefault(-1)) - .detail("LogEndVersion", metaLogEnd.orDefault(-1)); + .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(invalidVersion)) + .detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(invalidVersion)) + .detail("LogBeginVersion", metaLogBegin.orDefault(invalidVersion)) + .detail("LogEndVersion", metaLogEnd.orDefault(invalidVersion)); metaLogBegin = Optional(); metaLogEnd = Optional(); } - // If the unreliable end version is not set or is < expiredEndVersion then default it to expiredEndVersion + // If the unreliable end version is not set or is < expiredEndVersion then increase it to expiredEndVersion. + // Describe does not update unreliableEnd in the backup metadata for safety reasons as there is no + // compare-and-set operation to atomically change it and an expire process could be advancing it simultaneously. if(!metaUnreliableEnd.present() || metaUnreliableEnd.get() < metaExpiredEnd.orDefault(0)) metaUnreliableEnd = metaExpiredEnd; desc.unreliableEndVersion = metaUnreliableEnd; desc.expiredEndVersion = metaExpiredEnd; - // Start scanning for files at a version after which nothing has been deleted by an expire or 0 + // Start scanning at the end of the unreliable version range, which is the version before which data is likely + // missing because an expire process has operated on that range. state Version scanBegin = desc.unreliableEndVersion.orDefault(0); state Version scanEnd = std::numeric_limits::max(); // Use the known log range if present + // Logs are assumed to be contiguious between metaLogBegin and metaLogEnd, so initalize desc accordingly if(metaLogBegin.present() && metaLogEnd.present()) { - // Logs are assumed to be contiguious between metaLogBegin and metaLogEnd, so initalize desc accordingly - desc.minLogBegin = metaLogBegin.get(); + // minLogBegin is the greater of the log begin metadata OR the unreliable end version since we can't count + // on log file presence before that version. + desc.minLogBegin = std::max(metaLogBegin.get(), desc.unreliableEndVersion.orDefault(0)); + + // Set the maximum known end version of a log file, so far, which is also the assumed contiguous log file end version desc.maxLogEnd = metaLogEnd.get(); desc.contiguousLogEnd = desc.maxLogEnd; - // Move scanBegin forward (higher) to the contiguous log end version + // Advance scanBegin to the contiguous log end version scanBegin = desc.contiguousLogEnd.get(); } - std::vector snapshots = wait(bc->listKeyspaceSnapshots()); - desc.snapshots = snapshots; + state std::vector logs; + Void _ = wait(store(bc->listLogFiles(scanBegin, scanEnd), logs) && store(bc->listKeyspaceSnapshots(), desc.snapshots)); - std::vector logs = wait(bc->listLogFiles(scanBegin, scanEnd)); + // List logs in version order so log continuity can be analyzed + std::sort(logs.begin(), logs.end()); if(!logs.empty()) { desc.maxLogEnd = logs.rbegin()->endVersion; @@ -623,7 +637,8 @@ public: try { state Future updates = Void(); - if(desc.minLogBegin.present() && metaLogBegin != desc.minLogBegin) { + // Only update minLogBegin if we did NOT use a log start override version + if(logStartVersionOverride == invalidVersion && desc.minLogBegin.present() && metaLogBegin != desc.minLogBegin) { updates = updates && bc->logBeginVersion().set(desc.minLogBegin.get()); } @@ -677,8 +692,8 @@ public: } // Uses the virtual methods to describe the backup contents - Future describeBackup(bool deepScan = false) { - return describeBackup_impl(Reference::addRef(this), deepScan); + Future describeBackup(bool deepScan, Version logStartVersionOverride) { + return describeBackup_impl(Reference::addRef(this), deepScan, logStartVersionOverride); } ACTOR static Future expireData_impl(Reference bc, Version expireEndVersion, bool force, Version restorableBeginVersion) { @@ -686,11 +701,11 @@ public: throw backup_cannot_expire(); // Get the backup description. - state BackupDescription desc = wait(bc->describeBackup()); + state BackupDescription desc = wait(bc->describeBackup(false, expireEndVersion)); // If the expire request is to a version at or before the previous version to which data was already deleted // then do nothing and just return - if(expireEndVersion <= desc.expiredEndVersion.orDefault(-1)) { + if(expireEndVersion <= desc.expiredEndVersion.orDefault(invalidVersion)) { return Void(); } @@ -714,25 +729,31 @@ public: .detail("ExpireEndVersion", expireEndVersion) .detail("ScanBeginVersion", scanBegin); - // Get log files that contain any data at or before expireEndVersion - state std::vector logs = wait(bc->listLogFiles(scanBegin, expireEndVersion - 1)); - // Get range files up to and including expireEndVersion - state std::vector ranges = wait(bc->listRangeFiles(scanBegin, expireEndVersion - 1)); + state std::vector logs; + state std::vector ranges; + + // Get log files or range files that contain any data at or before expireEndVersion + Void _ = wait(store(bc->listLogFiles(scanBegin, expireEndVersion - 1), logs) && store(bc->listRangeFiles(scanBegin, expireEndVersion - 1), ranges)); // The new logBeginVersion will be taken from the last log file, if there is one state Optional newLogBeginVersion; if(!logs.empty()) { - LogFile &last = logs.back(); + // Linear scan the unsorted logs to find the latest one in sorted order + LogFile &last = *std::max_element(logs.begin(), logs.end()); + // If the last log ends at expireEndVersion then that will be the next log begin if(last.endVersion == expireEndVersion) { newLogBeginVersion = expireEndVersion; } else { // If the last log overlaps the expiredEnd then use the log's begin version and move the expiredEnd - // back to match it. + // back to match it and keep the last log file if(last.endVersion > expireEndVersion) { newLogBeginVersion = last.beginVersion; - logs.pop_back(); + + // Instead of modifying this potentially very large vector, just clear LogFile + last = LogFile(); + expireEndVersion = newLogBeginVersion.get(); } } @@ -743,7 +764,10 @@ public: // Move filenames out of vector then destroy it to save memory for(auto const &f : logs) { - toDelete.push_back(std::move(f.fileName)); + // We may have cleared the last log file earlier so skip any empty filenames + if(!f.fileName.empty()) { + toDelete.push_back(std::move(f.fileName)); + } } logs.clear(); @@ -764,10 +788,6 @@ public: } desc = BackupDescription(); - for(auto &f : toDelete) { - printf("delete: %s\n", f.c_str()); - } - // If some files to delete were found AND force is needed AND the force option is NOT set, then fail if(!toDelete.empty() && forceNeeded && !force) throw backup_cannot_expire(); @@ -804,7 +824,7 @@ public: } // Update the expiredEndVersion metadata to indicate that everything prior to that version has been - // successfully deleted. a if existing value is < expiredEndVersion + // successfully deleted if the current version is lower or missing Optional metaExpiredEnd = wait(bc->expiredEndVersion().get()); if(metaExpiredEnd.orDefault(0) < expireEndVersion) { Void _ = wait(bc->expiredEndVersion().set(expireEndVersion)); @@ -839,7 +859,10 @@ public: if(snapshot.get().beginVersion == snapshot.get().endVersion && snapshot.get().endVersion == targetVersion) return Optional(restorable); - std::vector logs = wait(bc->listLogFiles(snapshot.get().beginVersion, targetVersion)); + state std::vector logs = wait(bc->listLogFiles(snapshot.get().beginVersion, targetVersion)); + + // List logs in version order so log continuity can be analyzed + std::sort(logs.begin(), logs.end()); // If there are logs and the first one starts at or before the snapshot begin version then proceed if(!logs.empty() && logs.front().beginVersion <= snapshot.get().beginVersion) { @@ -1103,7 +1126,7 @@ public: Future deleteContainer(int *pNumDeleted) { // In order to avoid deleting some random directory due to user error, first describe the backup // and make sure it has something in it. - return map(describeBackup(), [=](BackupDescription const &desc) { + return map(describeBackup(false, invalidVersion), [=](BackupDescription const &desc) { // If the backup has no snapshots and no logs then it's probably not a valid backup if(desc.snapshots.size() == 0 && !desc.minLogBegin.present()) throw backup_invalid_url(); diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index bbc696f96a..f74b2c20a2 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -186,9 +186,12 @@ public: // updated with the count of deleted files so that progress can be seen. virtual Future deleteContainer(int *pNumDeleted = nullptr) = 0; - // Return key details about a backup's contents, possibly using cached or stored metadata - // unless deepScan is true. - virtual Future describeBackup(bool deepScan = false) = 0; + // Return key details about a backup's contents. + // Unless deepScan is true, use cached metadata, if present, as initial contiguous available log range. + // If logStartVersionOverride is given, log data prior to that version will be ignored for the purposes + // of this describe operation. This can be used to calculate what the restorability of a backup would + // be after deleting all data prior to logStartVersionOverride. + virtual Future describeBackup(bool deepScan = false, Version logStartVersionOverride = invalidVersion) = 0; virtual Future dumpFileList() = 0; From afa243bc970e8da44d65830a6ffd68d0cad33b4e Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 18 Dec 2018 18:55:44 -0800 Subject: [PATCH 26/55] Added fdbbackup expire options to calculate approximate version boundaries based on a number of days prior to the latest log file found in the backup container. This enables expiration operations based on time (with reasonable precision) without accessing the source cluster. --- fdbbackup/backup.actor.cpp | 26 ++++++++++++++-- fdbclient/BackupContainer.actor.cpp | 47 ++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 043dcf16d6..c13d5e1b25 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -91,7 +91,8 @@ enum enumRestoreType { enum { // Backup constants OPT_DESTCONTAINER, OPT_SNAPSHOTINTERVAL, OPT_ERRORLIMIT, OPT_NOSTOPWHENDONE, - OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, + OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS, + OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS, OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS, // Backup and Restore constants @@ -336,6 +337,8 @@ CSimpleOpt::SOption g_rgBackupExpireOptions[] = { { OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, "--restorable_after_timestamp", SO_REQ_SEP }, { OPT_EXPIRE_BEFORE_VERSION, "--expire_before_version", SO_REQ_SEP }, { OPT_EXPIRE_BEFORE_DATETIME, "--expire_before_timestamp", SO_REQ_SEP }, + { OPT_EXPIRE_MIN_RESTORABLE_DAYS, "--min_restorable_days", SO_REQ_SEP }, + { OPT_EXPIRE_DELETE_BEFORE_DAYS, "--delete_before_days", SO_REQ_SEP }, SO_END_OF_OPTIONS }; @@ -724,10 +727,16 @@ static void printBackupUsage(bool devhelp) { " in the database to obtain a cutoff version very close to the timestamp given in YYYY-MM-DD.HH:MI:SS format (UTC).\n"); printf(" --expire_before_version VERSION\n" " Version cutoff for expire operations. Deletes data files containing no data at or after VERSION.\n"); + printf(" --delete_before_days NUM_DAYS\n" + " Another way to specify version cutoff for expire operations. Deletes data files containing no data at or after a\n" + " version approximately NUM_DAYS days worth of versions prior to the latest log version in the backup.\n"); printf(" --restorable_after_timestamp DATETIME\n" " For expire operations, set minimum acceptable restorability to the version equivalent of DATETIME and later.\n"); printf(" --restorable_after_version VERSION\n" " For expire operations, set minimum acceptable restorability to the VERSION and later.\n"); + printf(" --min_restorable_days NUM_DAYS\n" + " For expire operations, set minimum acceptable restorability to approximately NUM_DAYS days worth of versions\n" + " prior to the latest log version in the backup.\n"); printf(" --version_timestamps\n"); printf(" For describe operations, lookup versions in the database to obtain timestamps. A cluster file is required.\n"); printf(" -f, --force For expire operations, force expiration even if minimum restorability would be violated.\n"); @@ -1843,7 +1852,10 @@ ACTOR Future expireBackupData(const char *name, std::string destinationCon try { Reference c = openBackupContainer(name, destinationContainer); Void _ = wait(c->expireData(endVersion, force, restorableAfterVersion)); - printf("All data before version %lld is deleted.\n", endVersion); + if(endVersion < 0) + printf("All data before %lld versions (%lld days) prior to latest backup log has been deleted.\n", -endVersion, -endVersion / ((int64_t)24 * 3600 * CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + else + printf("All data before version %lld has been deleted.\n", endVersion); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) @@ -2396,6 +2408,8 @@ int main(int argc, char* argv[]) { break; case OPT_EXPIRE_BEFORE_VERSION: case OPT_EXPIRE_RESTORABLE_AFTER_VERSION: + case OPT_EXPIRE_MIN_RESTORABLE_DAYS: + case OPT_EXPIRE_DELETE_BEFORE_DAYS: { const char* a = args->OptionArg(); long long ver = 0; @@ -2404,7 +2418,13 @@ int main(int argc, char* argv[]) { printHelpTeaser(argv[0]); return FDB_EXIT_ERROR; } - if(optId == OPT_EXPIRE_BEFORE_VERSION) + + // Interpret the value as days worth of versions relative to now (negative) + if(optId == OPT_EXPIRE_MIN_RESTORABLE_DAYS || optId == OPT_EXPIRE_DELETE_BEFORE_DAYS) { + ver = -ver * 24 * 60 * 60 * CLIENT_KNOBS->CORE_VERSIONSPERSECOND; + } + + if(optId == OPT_EXPIRE_BEFORE_VERSION || optId == OPT_EXPIRE_DELETE_BEFORE_DAYS) expireVersion = ver; else expireRestorableAfterVersion = ver; diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index f22432cbda..6cbabc4e70 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -515,10 +515,37 @@ public: return dumpFileList_impl(Reference::addRef(this)); } + static Version resolveRelativeVersion(Optional max, Version v, const char *name, Error e) { + if(v == invalidVersion) { + TraceEvent(SevError, "BackupExpireInvalidVersion").detail(name, v); + throw e; + } + if(v < 0) { + if(!max.present()) { + TraceEvent(SevError, "BackupExpireCannotResolveRelativeVersion").detail(name, v); + throw e; + } + v += max.get(); + } + return v; + } + ACTOR static Future describeBackup_impl(Reference bc, bool deepScan, Version logStartVersionOverride) { state BackupDescription desc; desc.url = bc->getURL(); + TraceEvent("BackupContainerDescribe1") + .detail("URL", bc->getURL()) + .detail("LogStartVersionOverride", logStartVersionOverride); + + // If logStartVersion is relative, then first do a recursive call without it to find the max log version + // from which to resolve the relative version. + // This could be handled more efficiently without recursion but it's tricky, this will do for now. + if(logStartVersionOverride != invalidVersion && logStartVersionOverride < 0) { + BackupDescription tmp = wait(bc->describeBackup(false, invalidVersion)); + logStartVersionOverride = resolveRelativeVersion(tmp.maxLogEnd, logStartVersionOverride, "LogStartVersionOverride", invalid_option_value()); + } + // Get metadata versions state Optional metaLogBegin; state Optional metaLogEnd; @@ -537,7 +564,7 @@ public: Void _ = wait(waitForAll(metaReads)); - TraceEvent("BackupContainerDescribe") + TraceEvent("BackupContainerDescribe2") .detail("URL", bc->getURL()) .detail("LogStartVersionOverride", logStartVersionOverride) .detail("ExpiredEndVersion", metaExpiredEnd.orDefault(invalidVersion)) @@ -545,7 +572,7 @@ public: .detail("LogBeginVersion", metaLogBegin.orDefault(invalidVersion)) .detail("LogEndVersion", metaLogEnd.orDefault(invalidVersion)); - // If the logStartVersionOverride is set then ensure that unreliableEndVersion is at least as much + // If the logStartVersionOverride is positive (not relative) then ensure that unreliableEndVersion is equal or greater if(logStartVersionOverride != invalidVersion && metaUnreliableEnd.orDefault(invalidVersion) < logStartVersionOverride) { metaUnreliableEnd = logStartVersionOverride; } @@ -697,12 +724,21 @@ public: } ACTOR static Future expireData_impl(Reference bc, Version expireEndVersion, bool force, Version restorableBeginVersion) { - if(restorableBeginVersion < expireEndVersion) - throw backup_cannot_expire(); + TraceEvent("BackupContainerFileSystemExpire1") + .detail("URL", bc->getURL()) + .detail("ExpireEndVersion", expireEndVersion) + .detail("RestorableBeginVersion", restorableBeginVersion); // Get the backup description. state BackupDescription desc = wait(bc->describeBackup(false, expireEndVersion)); + // Resolve relative versions using max log version + expireEndVersion = resolveRelativeVersion(desc.maxLogEnd, expireEndVersion, "ExpireEndVersion", backup_cannot_expire()); + restorableBeginVersion = resolveRelativeVersion(desc.maxLogEnd, restorableBeginVersion, "RestorableBeginVersion", backup_cannot_expire()); + + if(restorableBeginVersion < expireEndVersion) + throw backup_cannot_expire(); + // If the expire request is to a version at or before the previous version to which data was already deleted // then do nothing and just return if(expireEndVersion <= desc.expiredEndVersion.orDefault(invalidVersion)) { @@ -724,9 +760,10 @@ public: // Start scan for files to delete at the last completed expire operation's end or 0. state Version scanBegin = desc.expiredEndVersion.orDefault(0); - TraceEvent("BackupContainerFileSystemExpire") + TraceEvent("BackupContainerFileSystemExpire2") .detail("URL", bc->getURL()) .detail("ExpireEndVersion", expireEndVersion) + .detail("RestorableBeginVersion", restorableBeginVersion) .detail("ScanBeginVersion", scanBegin); state std::vector logs; From 172c3f2021c93a5bb6c3c8e260c4feb7283a660b Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 19 Dec 2018 10:35:06 -0800 Subject: [PATCH 27/55] Bug fix in backup describe, do not update log begin/end metadata in backup if describe was given a start version override as it can result in incorrect metadata. --- fdbclient/BackupContainer.actor.cpp | 46 ++++++++++++++++------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 6cbabc4e70..6a64ddf92a 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -657,29 +657,33 @@ public: } } - // If the log metadata begin/end versions are missing (or treated as missing due to invalidity) or - // differ from the newly calculated values for minLogBegin and contiguousLogEnd, respectively, - // then attempt to update the metadata in the backup container but ignore errors in case the - // container is not writeable. - try { - state Future updates = Void(); - // Only update minLogBegin if we did NOT use a log start override version - if(logStartVersionOverride == invalidVersion && desc.minLogBegin.present() && metaLogBegin != desc.minLogBegin) { - updates = updates && bc->logBeginVersion().set(desc.minLogBegin.get()); + // Only update stored contiguous log begin and end versions if we did NOT use a log start override. + // Otherwise, a series of describe operations can result in a version range which is actually missing data. + if(logStartVersionOverride == invalidVersion) { + // If the log metadata begin/end versions are missing (or treated as missing due to invalidity) or + // differ from the newly calculated values for minLogBegin and contiguousLogEnd, respectively, + // then attempt to update the metadata in the backup container but ignore errors in case the + // container is not writeable. + try { + state Future updates = Void(); + + if(desc.minLogBegin.present() && metaLogBegin != desc.minLogBegin) { + updates = updates && bc->logBeginVersion().set(desc.minLogBegin.get()); + } + + if(desc.contiguousLogEnd.present() && metaLogEnd != desc.contiguousLogEnd) { + updates = updates && bc->logEndVersion().set(desc.contiguousLogEnd.get()); + } + + Void _ = wait(updates); + } catch(Error &e) { + if(e.code() == error_code_actor_cancelled) + throw; + TraceEvent(SevWarn, "BackupContainerMetadataUpdateFailure") + .error(e) + .detail("URL", bc->getURL()); } - - if(desc.contiguousLogEnd.present() && metaLogEnd != desc.contiguousLogEnd) { - updates = updates && bc->logEndVersion().set(desc.contiguousLogEnd.get()); - } - - Void _ = wait(updates); - } catch(Error &e) { - if(e.code() == error_code_actor_cancelled) - throw; - TraceEvent(SevWarn, "BackupContainerMetadataUpdateFailure") - .error(e) - .detail("URL", bc->getURL()); } for(auto &s : desc.snapshots) { From fd4a62fbfdbe08637417cd97103084262104b858 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 19 Dec 2018 10:36:25 -0800 Subject: [PATCH 28/55] Backup expire will fail earlier if force option is needed but not specified. --- fdbclient/BackupContainer.actor.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 6a64ddf92a..6c741a844a 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -761,6 +761,13 @@ public: } } + // If force is needed but not passed then refuse to expire anything. + // Note that it is possible for there to be no actual files in the backup prior to expireEndVersion, + // if they were externally deleted or an expire operation deleted them but was terminated before + // updating expireEndVersion + if(forceNeeded && !force) + throw backup_cannot_expire(); + // Start scan for files to delete at the last completed expire operation's end or 0. state Version scanBegin = desc.expiredEndVersion.orDefault(0); @@ -829,10 +836,6 @@ public: } desc = BackupDescription(); - // If some files to delete were found AND force is needed AND the force option is NOT set, then fail - if(!toDelete.empty() && forceNeeded && !force) - throw backup_cannot_expire(); - // We are about to start deleting files, at which point all data prior to expireEndVersion is considered // 'unreliable' as some or all of it will be missing. So before deleting anything, read unreliableEndVersion // (don't use cached value in desc) and update its value if it is missing or < expireEndVersion From 00568f4011a1a500b6029e3d4dd3ba222ac95d7f Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 19 Dec 2018 13:14:48 -0800 Subject: [PATCH 29/55] Error code was misleading, added comment. --- fdbclient/BackupContainer.actor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 6c741a844a..011561e1b5 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -737,9 +737,10 @@ public: state BackupDescription desc = wait(bc->describeBackup(false, expireEndVersion)); // Resolve relative versions using max log version - expireEndVersion = resolveRelativeVersion(desc.maxLogEnd, expireEndVersion, "ExpireEndVersion", backup_cannot_expire()); - restorableBeginVersion = resolveRelativeVersion(desc.maxLogEnd, restorableBeginVersion, "RestorableBeginVersion", backup_cannot_expire()); + expireEndVersion = resolveRelativeVersion(desc.maxLogEnd, expireEndVersion, "ExpireEndVersion", invalid_option_value()); + restorableBeginVersion = resolveRelativeVersion(desc.maxLogEnd, restorableBeginVersion, "RestorableBeginVersion", invalid_option_value()); + // It would be impossible to have restorability to any version < expireEndVersion after expiring to that version if(restorableBeginVersion < expireEndVersion) throw backup_cannot_expire(); From fcb34a8768e17795727c707399f087488f4aafd4 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 19 Dec 2018 16:53:39 -0800 Subject: [PATCH 30/55] In backup describe, when not using the original cluster to resolve versions to date/time strings the versions will be converted to approximate day deltas from maxLogEndVersion (if available) using core_versionspersecond. --- fdbclient/BackupContainer.actor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 011561e1b5..0534a044b8 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -110,6 +110,10 @@ std::string BackupDescription::toString() const { else s = format("%lld (unknown)", v); } + else if(maxLogEnd.present()) { + double days = double(maxLogEnd.get() - v) / (CLIENT_KNOBS->CORE_VERSIONSPERSECOND * 24 * 60 * 60); + s = format("%lld (maxLogEnd %s%.2f days)", v, days < 0 ? "+" : "-", days); + } else { s = format("%lld", v); } From 354abebf64d55f9091fa3f0a81cb1146026d7c5c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Thu, 20 Dec 2018 00:23:26 -0800 Subject: [PATCH 31/55] Added progress reporting to backup expiration. Simplified backup delete progress. --- fdbbackup/backup.actor.cpp | 37 +++++++++++++++++++++++++--- fdbclient/BackupContainer.actor.cpp | 38 ++++++++++++++++++++++++++--- fdbclient/BackupContainer.h | 8 +++++- 3 files changed, 76 insertions(+), 7 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index c13d5e1b25..7826ae2b50 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -1851,7 +1851,31 @@ ACTOR Future expireBackupData(const char *name, std::string destinationCon try { Reference c = openBackupContainer(name, destinationContainer); - Void _ = wait(c->expireData(endVersion, force, restorableAfterVersion)); + + state IBackupContainer::ExpireProgress progress; + state std::string lastProgress; + state Future expire = c->expireData(endVersion, force, &progress, restorableAfterVersion); + + loop { + choose { + when(Void _ = wait(delay(5))) { + std::string p = progress.toString(); + if(p != lastProgress) { + int spaces = lastProgress.size() - p.size(); + printf("\r%s%s", p.c_str(), (spaces > 0 ? std::string(spaces, ' ').c_str() : "") ); + lastProgress = p; + } + } + when(Void _ = wait(expire)) { + break; + } + } + } + + std::string p = progress.toString(); + int spaces = lastProgress.size() - p.size(); + printf("\r%s%s\n", p.c_str(), (spaces > 0 ? std::string(spaces, ' ').c_str() : "") ); + if(endVersion < 0) printf("All data before %lld versions (%lld days) prior to latest backup log has been deleted.\n", -endVersion, -endVersion / ((int64_t)24 * 3600 * CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); else @@ -1876,17 +1900,24 @@ ACTOR Future deleteBackupContainer(const char *name, std::string destinati state int numDeleted = 0; state Future done = c->deleteContainer(&numDeleted); + state int lastUpdate = -1; + printf("Deleting data...\n"); + loop { choose { when ( Void _ = wait(done) ) { printf("The entire container has been deleted.\n"); break; } - when ( Void _ = wait(delay(3)) ) { - printf("%d files have been deleted so far...\n", numDeleted); + when ( Void _ = wait(delay(5)) ) { + if(numDeleted != lastUpdate) { + printf("\r%d...", numDeleted); + lastUpdate = numDeleted; + } } } } + printf("\n"); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 0534a044b8..23cce6a28e 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -47,6 +47,14 @@ Future IBackupFile::appendStringRefWithLen(Standalone s) { return IBackupFile_impl::appendStringRefWithLen(Reference::addRef(this), s); } +std::string IBackupContainer::ExpireProgress::toString() const { + std::string s = step + "..."; + if(total > 0) { + s += format("%d/%d (%.2f%%)", done, total, double(done) / total * 100); + } + return s; +} + std::string formatTime(int64_t t) { time_t curTime = (time_t)t; char buffer[128]; @@ -731,7 +739,12 @@ public: return describeBackup_impl(Reference::addRef(this), deepScan, logStartVersionOverride); } - ACTOR static Future expireData_impl(Reference bc, Version expireEndVersion, bool force, Version restorableBeginVersion) { + ACTOR static Future expireData_impl(Reference bc, Version expireEndVersion, bool force, ExpireProgress *progress, Version restorableBeginVersion) { + if(progress != nullptr) { + progress->step = "Describing backup"; + progress->total = 0; + } + TraceEvent("BackupContainerFileSystemExpire1") .detail("URL", bc->getURL()) .detail("ExpireEndVersion", expireEndVersion) @@ -785,6 +798,9 @@ public: state std::vector logs; state std::vector ranges; + if(progress != nullptr) { + progress->step = "Listing files"; + } // Get log files or range files that contain any data at or before expireEndVersion Void _ = wait(store(bc->listLogFiles(scanBegin, expireEndVersion - 1), logs) && store(bc->listRangeFiles(scanBegin, expireEndVersion - 1), ranges)); @@ -844,11 +860,20 @@ public: // We are about to start deleting files, at which point all data prior to expireEndVersion is considered // 'unreliable' as some or all of it will be missing. So before deleting anything, read unreliableEndVersion // (don't use cached value in desc) and update its value if it is missing or < expireEndVersion + if(progress != nullptr) { + progress->step = "Initial metadata update"; + } Optional metaUnreliableEnd = wait(bc->unreliableEndVersion().get()); if(metaUnreliableEnd.orDefault(0) < expireEndVersion) { Void _ = wait(bc->unreliableEndVersion().set(expireEndVersion)); } + if(progress != nullptr) { + progress->step = "Deleting files"; + progress->total = toDelete.size(); + progress->done = 0; + } + // Delete files, but limit parallelism because the file list could use a lot of memory and the corresponding // delete actor states would use even more if they all existed at the same time. state std::list> deleteFutures; @@ -868,10 +893,17 @@ public: while(deleteFutures.size() > targetFuturesSize) { Void _ = wait(deleteFutures.front()); + if(progress != nullptr) { + ++progress->done; + } deleteFutures.pop_front(); } } + if(progress != nullptr) { + progress->step = "Final metadata update"; + progress->total = 0; + } // Update the expiredEndVersion metadata to indicate that everything prior to that version has been // successfully deleted if the current version is lower or missing Optional metaExpiredEnd = wait(bc->expiredEndVersion().get()); @@ -883,8 +915,8 @@ public: } // Delete all data up to (but not including endVersion) - Future expireData(Version expireEndVersion, bool force, Version restorableBeginVersion) { - return expireData_impl(Reference::addRef(this), expireEndVersion, force, restorableBeginVersion); + Future expireData(Version expireEndVersion, bool force, ExpireProgress *progress, Version restorableBeginVersion) { + return expireData_impl(Reference::addRef(this), expireEndVersion, force, progress, restorableBeginVersion); } ACTOR static Future> getRestoreSet_impl(Reference bc, Version targetVersion) { diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index f74b2c20a2..dc55ea2bed 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -174,13 +174,19 @@ public: // Open a file for read by name virtual Future> readFile(std::string name) = 0; + struct ExpireProgress { + std::string step; + int total; + int done; + std::string toString() const; + }; // Delete backup files which do not contain any data at or after (more recent than) expireEndVersion. // If force is false, then nothing will be deleted unless there is a restorable snapshot which // - begins at or after expireEndVersion // - ends at or before restorableBeginVersion // If force is true, data is deleted unconditionally which could leave the backup in an unusable state. This is not recommended. // Returns true if expiration was done. - virtual Future expireData(Version expireEndVersion, bool force = false, Version restorableBeginVersion = std::numeric_limits::max()) = 0; + virtual Future expireData(Version expireEndVersion, bool force = false, ExpireProgress *progress = nullptr, Version restorableBeginVersion = std::numeric_limits::max()) = 0; // Delete entire container. During the process, if pNumDeleted is not null it will be // updated with the count of deleted files so that progress can be seen. From 567a7bd58ad234c7d2137d926de140262da0cce5 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Thu, 20 Dec 2018 01:24:31 -0800 Subject: [PATCH 32/55] Print a final deleted object count at the end of backup deletion. --- fdbbackup/backup.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 7826ae2b50..29bdfe2a18 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -1917,7 +1917,7 @@ ACTOR Future deleteBackupContainer(const char *name, std::string destinati } } } - printf("\n"); + printf("\r%d objects deleted\n", numDeleted); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) From f64d5321e9507a54845d8e257b7051186c22e41c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Thu, 20 Dec 2018 18:05:23 -0800 Subject: [PATCH 33/55] Backup describe, expire, and delete will now clearly indicate when there is no backup at the given URL. --- fdbbackup/backup.actor.cpp | 4 ++-- fdbclient/BackupContainer.actor.cpp | 23 +++++++++++++++++++++++ fdbclient/BackupContainer.h | 1 + flow/Platform.cpp | 13 +++++++++++++ flow/Platform.h | 3 +++ flow/error_definitions.h | 1 + 6 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 29bdfe2a18..f6c73fcbb2 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -1901,12 +1901,11 @@ ACTOR Future deleteBackupContainer(const char *name, std::string destinati state Future done = c->deleteContainer(&numDeleted); state int lastUpdate = -1; - printf("Deleting data...\n"); + printf("Deleting %s...\n", destinationContainer.c_str()); loop { choose { when ( Void _ = wait(done) ) { - printf("The entire container has been deleted.\n"); break; } when ( Void _ = wait(delay(5)) ) { @@ -1918,6 +1917,7 @@ ACTOR Future deleteBackupContainer(const char *name, std::string destinati } } printf("\r%d objects deleted\n", numDeleted); + printf("The entire container has been deleted.\n"); } catch (Error& e) { if(e.code() == error_code_actor_cancelled) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 23cce6a28e..a83292039f 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -200,6 +200,7 @@ public: // Create the container virtual Future create() = 0; + virtual Future exists() = 0; // Get a list of fileNames and their sizes in the container under the given path // Although not required, an implementation can avoid traversing unwanted subfolders @@ -550,6 +551,12 @@ public: .detail("URL", bc->getURL()) .detail("LogStartVersionOverride", logStartVersionOverride); + bool e = wait(bc->exists()); + if(!e) { + TraceEvent(SevWarnAlways, "BackupContainerDoesNotExist").detail("URL", bc->getURL()); + throw backup_does_not_exist(); + } + // If logStartVersion is relative, then first do a recursive call without it to find the max log version // from which to resolve the relative version. // This could be handled more efficiently without recursion but it's tricky, this will do for now. @@ -1115,6 +1122,11 @@ public: return Void(); } + // The container exists if the folder it resides in exists + Future exists() { + return directoryExists(m_path); + } + Future> readFile(std::string path) { int flags = IAsyncFile::OPEN_NO_AIO | IAsyncFile::OPEN_READONLY | IAsyncFile::OPEN_UNCACHED; // Simulation does not properly handle opening the same file from multiple machines using a shared filesystem, @@ -1361,7 +1373,18 @@ public: return create_impl(Reference::addRef(this)); } + // The container exists if the index entry in the blob bucket exists + Future exists() { + return m_bstore->objectExists(m_bucket, indexEntry()); + } + ACTOR static Future deleteContainer_impl(Reference bc, int *pNumDeleted) { + bool e = wait(bc->exists()); + if(!e) { + TraceEvent(SevWarnAlways, "BackupContainerDoesNotExist").detail("URL", bc->getURL()); + throw backup_does_not_exist(); + } + // First delete everything under the data prefix in the bucket Void _ = wait(bc->m_bstore->deleteRecursively(bc->m_bucket, bc->dataPath(""), pNumDeleted)); diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index dc55ea2bed..9154992ec8 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -162,6 +162,7 @@ public: // Create the container virtual Future create() = 0; + virtual Future exists() = 0; // Open a log file or range file for writing virtual Future> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0; diff --git a/flow/Platform.cpp b/flow/Platform.cpp index 8b186d5a62..d612eeb174 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2062,6 +2062,19 @@ bool fileExists(std::string const& filename) { return true; } +bool directoryExists(std::string const& path) { +#ifdef _WIN32 + DWORD bits = ::GetFileAttributes(path.c_str()); + return bits != INVALID_FILE_ATTRIBUTES && (bits & FILE_ATTRIBUTE_DIRECTORY); +#else + DIR *d = opendir(path.c_str()); + if(d == nullptr) + return false; + closedir(d); + return true; +#endif +} + int64_t fileSize(std::string const& filename) { #ifdef _WIN32 struct _stati64 file_status; diff --git a/flow/Platform.h b/flow/Platform.h index dc998ec4d9..a946a0a8ed 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -285,6 +285,9 @@ void threadYield(); // Attempt to yield to other processes or threads // Returns true iff the file exists bool fileExists(std::string const& filename); +// Returns true iff the directory exists +bool directoryExists(std::string const& path); + // Returns size of file in bytes int64_t fileSize(std::string const& filename); diff --git a/flow/error_definitions.h b/flow/error_definitions.h index c0f6b0445a..d6a5acdcb3 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -177,6 +177,7 @@ ERROR( backup_invalid_info, 2315, "Backup Container URL invalid") ERROR( backup_cannot_expire, 2316, "Cannot expire requested data from backup without violating minimum restorability") ERROR( backup_auth_missing, 2317, "Cannot find authentication details (such as a password or secret key) for the specified Backup Container URL") ERROR( backup_auth_unreadable, 2318, "Cannot read or parse one or more sources of authentication information for Backup Container URLs") +ERROR( backup_does_not_exist, 2319, "Backup does not exist") ERROR( restore_invalid_version, 2361, "Invalid restore version") ERROR( restore_corrupted_data, 2362, "Corrupted backup data") ERROR( restore_missing_data, 2363, "Missing backup data") From 928876be81d9468aa7b8e9f38cec5455c0ff498b Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 21 Dec 2018 22:42:29 -0800 Subject: [PATCH 34/55] Backup dumpFileList() can now take a version range. --- fdbclient/BackupContainer.actor.cpp | 38 ++++++++++++++++++++--------- fdbclient/BackupContainer.h | 6 +++-- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index a83292039f..dc7dc1734e 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -55,6 +55,18 @@ std::string IBackupContainer::ExpireProgress::toString() const { return s; } +void BackupFileList::toStream(FILE *fout) const { + for(const RangeFile &f : ranges) { + fprintf(fout, "range %lld %s\n", f.fileSize, f.fileName.c_str()); + } + for(const LogFile &f : logs) { + fprintf(fout, "log %lld %s\n", f.fileSize, f.fileName.c_str()); + } + for(const KeyspaceSnapshotFile &f : snapshots) { + fprintf(fout, "snapshotManifest %lld %s\n", f.totalSize, f.fileName.c_str()); + } +} + std::string formatTime(int64_t t) { time_t curTime = (time_t)t; char buffer[128]; @@ -502,13 +514,13 @@ public: }); } - // List snapshots which have been fully written, in sorted beginVersion order. - Future> listKeyspaceSnapshots() { + // List snapshots which have been fully written, in sorted beginVersion order, which start before end and finish on or after begin + Future> listKeyspaceSnapshots(Version begin = 0, Version end = std::numeric_limits::max()) { return map(listFiles("snapshots/"), [=](const FilesAndSizesT &files) { std::vector results; KeyspaceSnapshotFile sf; for(auto &f : files) { - if(pathToKeyspaceSnapshotFile(sf, f.first)) + if(pathToKeyspaceSnapshotFile(sf, f.first) && sf.beginVersion < end && sf.endVersion >= begin) results.push_back(sf); } std::sort(results.begin(), results.end()); @@ -516,16 +528,18 @@ public: }); } - ACTOR static Future dumpFileList_impl(Reference bc) { - state Future> fRanges = bc->listRangeFiles(0, std::numeric_limits::max()); - state Future> fSnapshots = bc->listKeyspaceSnapshots(); - state Future> fLogs = bc->listLogFiles(0, std::numeric_limits::max()); + ACTOR static Future dumpFileList_impl(Reference bc, Version begin, Version end) { + state Future> fRanges = bc->listRangeFiles(begin, end); + state Future> fSnapshots = bc->listKeyspaceSnapshots(begin, end); + state Future> fLogs = bc->listLogFiles(begin, end); + Void _ = wait(success(fRanges) && success(fSnapshots) && success(fLogs)); - return FullBackupListing({fRanges.get(), fLogs.get(), fSnapshots.get()}); + + return BackupFileList({fRanges.get(), fLogs.get(), fSnapshots.get()}); } - Future dumpFileList() { - return dumpFileList_impl(Reference::addRef(this)); + Future dumpFileList(Version begin, Version end) { + return dumpFileList_impl(Reference::addRef(this), begin, end); } static Version resolveRelativeVersion(Optional max, Version v, const char *name, Error e) { @@ -1723,7 +1737,7 @@ ACTOR Future testBackupContainer(std::string url) { Void _ = wait(waitForAll(writes)); - state FullBackupListing listing = wait(c->dumpFileList()); + state BackupFileList listing = wait(c->dumpFileList()); ASSERT(listing.ranges.size() == nRangeFiles); ASSERT(listing.logs.size() == logs.size()); ASSERT(listing.snapshots.size() == snapshots.size()); @@ -1769,7 +1783,7 @@ ACTOR Future testBackupContainer(std::string url) { ASSERT(d.snapshots.size() == 0); ASSERT(!d.minLogBegin.present()); - FullBackupListing empty = wait(c->dumpFileList()); + BackupFileList empty = wait(c->dumpFileList()); ASSERT(empty.ranges.size() == 0); ASSERT(empty.logs.size() == 0); ASSERT(empty.snapshots.size() == 0); diff --git a/fdbclient/BackupContainer.h b/fdbclient/BackupContainer.h index 9154992ec8..dd350330d4 100644 --- a/fdbclient/BackupContainer.h +++ b/fdbclient/BackupContainer.h @@ -96,10 +96,12 @@ struct KeyspaceSnapshotFile { } }; -struct FullBackupListing { +struct BackupFileList { std::vector ranges; std::vector logs; std::vector snapshots; + + void toStream(FILE *fout) const; }; // The byte counts here only include usable log files and byte counts from kvrange manifests @@ -200,7 +202,7 @@ public: // be after deleting all data prior to logStartVersionOverride. virtual Future describeBackup(bool deepScan = false, Version logStartVersionOverride = invalidVersion) = 0; - virtual Future dumpFileList() = 0; + virtual Future dumpFileList(Version begin = 0, Version end = std::numeric_limits::max()) = 0; // Get exactly the files necessary to restore to targetVersion. Returns non-present if // restore to given version is not possible. From 19c0ccd4c032dee6d29d804cc92d7b7418ec0323 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Thu, 3 Jan 2019 02:35:31 -0800 Subject: [PATCH 35/55] Added fdbbackup dump option to list files and sizes in a backup for a given version range. --- fdbbackup/backup.actor.cpp | 97 +++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index f6c73fcbb2..c10369ad70 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -76,7 +76,7 @@ enum enumProgramExe { }; enum enumBackupType { - BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST + BACKUP_UNDEFINED=0, BACKUP_START, BACKUP_STATUS, BACKUP_ABORT, BACKUP_WAIT, BACKUP_DISCONTINUE, BACKUP_PAUSE, BACKUP_RESUME, BACKUP_EXPIRE, BACKUP_DELETE, BACKUP_DESCRIBE, BACKUP_LIST, BACKUP_DUMP }; enum enumDBType { @@ -94,6 +94,7 @@ enum { OPT_EXPIRE_BEFORE_VERSION, OPT_EXPIRE_BEFORE_DATETIME, OPT_EXPIRE_DELETE_BEFORE_DAYS, OPT_EXPIRE_RESTORABLE_AFTER_VERSION, OPT_EXPIRE_RESTORABLE_AFTER_DATETIME, OPT_EXPIRE_MIN_RESTORABLE_DAYS, OPT_BASEURL, OPT_BLOB_CREDENTIALS, OPT_DESCRIBE_DEEP, OPT_DESCRIBE_TIMESTAMPS, + OPT_DUMP_BEGIN, OPT_DUMP_END, // Backup and Restore constants OPT_TAGNAME, OPT_BACKUPKEYS, OPT_WAITFORDONE, @@ -397,6 +398,35 @@ CSimpleOpt::SOption g_rgBackupDescribeOptions[] = { SO_END_OF_OPTIONS }; +CSimpleOpt::SOption g_rgBackupDumpOptions[] = { +#ifdef _WIN32 + { OPT_PARENTPID, "--parentpid", SO_REQ_SEP }, +#endif + { OPT_CLUSTERFILE, "-C", SO_REQ_SEP }, + { OPT_CLUSTERFILE, "--cluster_file", SO_REQ_SEP }, + { OPT_DESTCONTAINER, "-d", SO_REQ_SEP }, + { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, + { OPT_TRACE, "--log", SO_NONE }, + { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_QUIET, "-q", SO_NONE }, + { OPT_QUIET, "--quiet", SO_NONE }, + { OPT_VERSION, "-v", SO_NONE }, + { OPT_VERSION, "--version", SO_NONE }, + { OPT_CRASHONERROR, "--crash", SO_NONE }, + { OPT_MEMLIMIT, "-m", SO_REQ_SEP }, + { OPT_MEMLIMIT, "--memory", SO_REQ_SEP }, + { OPT_HELP, "-?", SO_NONE }, + { OPT_HELP, "-h", SO_NONE }, + { OPT_HELP, "--help", SO_NONE }, + { OPT_DEVHELP, "--dev-help", SO_NONE }, + { OPT_BLOB_CREDENTIALS, "--blob_credentials", SO_REQ_SEP }, + { OPT_KNOB, "--knob_", SO_REQ_SEP }, + { OPT_DUMP_BEGIN, "--begin", SO_REQ_SEP }, + { OPT_DUMP_END, "--end", SO_REQ_SEP }, + + SO_END_OF_OPTIONS +}; + CSimpleOpt::SOption g_rgBackupListOptions[] = { #ifdef _WIN32 { OPT_PARENTPID, "--parentpid", SO_REQ_SEP }, @@ -978,6 +1008,7 @@ enumBackupType getBackupType(std::string backupType) values["delete"] = BACKUP_DELETE; values["describe"] = BACKUP_DESCRIBE; values["list"] = BACKUP_LIST; + values["dump"] = BACKUP_DUMP; } auto i = values.find(backupType); @@ -1832,6 +1863,33 @@ Reference openBackupContainer(const char *name, std::string de return c; } +ACTOR Future dumpBackupData(const char *name, std::string destinationContainer, Version beginVersion, Version endVersion) { + state Reference c = openBackupContainer(name, destinationContainer); + + if(beginVersion < 0 || endVersion < 0) { + BackupDescription desc = wait(c->describeBackup()); + + if(!desc.maxLogEnd.present()) { + fprintf(stderr, "ERROR: Backup must have log data in order to use relative begin/end versions.\n"); + throw backup_invalid_info(); + } + + if(beginVersion < 0) { + beginVersion += desc.maxLogEnd.get(); + } + + if(endVersion < 0) { + endVersion += desc.maxLogEnd.get(); + } + } + + printf("Scanning version range %lld to %lld\n", beginVersion, endVersion); + BackupFileList files = wait(c->dumpFileList(beginVersion, endVersion)); + files.toStream(stdout); + + return Void(); +} + ACTOR Future expireBackupData(const char *name, std::string destinationContainer, Version endVersion, std::string endDatetime, Database db, bool force, Version restorableAfterVersion, std::string restorableAfterDatetime) { if (!endDatetime.empty()) { Version v = wait( timeKeeperVersionFromDatetime(endDatetime, db) ); @@ -2115,6 +2173,26 @@ static void addKeyRange(std::string optionValue, StandaloneCORE_VERSIONSPERSECOND * 24 * 3600 * -days; + } + + Version ver; + if(sscanf(str, "%lld", &ver) != 1) { + fprintf(stderr, "Could not parse version: %s\n", str); + flushAndExit(FDB_EXIT_ERROR); + } + return ver; +} + #ifdef ALLOC_INSTRUMENTATION extern uint8_t *g_extra_memory; #endif @@ -2193,6 +2271,9 @@ int main(int argc, char* argv[]) { case BACKUP_DESCRIBE: args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupDescribeOptions, SO_O_EXACT); break; + case BACKUP_DUMP: + args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupDumpOptions, SO_O_EXACT); + break; case BACKUP_LIST: args = new CSimpleOpt(argc - 1, &argv[1], g_rgBackupListOptions, SO_O_EXACT); break; @@ -2330,6 +2411,8 @@ int main(int argc, char* argv[]) { uint64_t memLimit = 8LL << 30; Optional ti; std::vector blobCredentials; + Version dumpBegin = 0; + Version dumpEnd = std::numeric_limits::max(); if( argc == 1 ) { printUsage(programExe, false); @@ -2587,6 +2670,12 @@ int main(int argc, char* argv[]) { case OPT_BLOB_CREDENTIALS: blobCredentials.push_back(args->OptionArg()); break; + case OPT_DUMP_BEGIN: + dumpBegin = parseVersion(args->OptionArg()); + break; + case OPT_DUMP_END: + dumpEnd = parseVersion(args->OptionArg()); + break; } } @@ -2908,11 +2997,17 @@ int main(int argc, char* argv[]) { // Only pass database optionDatabase Describe will lookup version timestamps if a cluster file was given, but quietly skip them if not. f = stopAfter( describeBackup(argv[0], destinationContainer, describeDeep, describeTimestamps ? Optional(db) : Optional()) ); break; + case BACKUP_LIST: initTraceFile(); f = stopAfter( listBackup(baseUrl) ); break; + case BACKUP_DUMP: + initTraceFile(); + f = stopAfter( dumpBackupData(argv[0], destinationContainer, dumpBegin, dumpEnd) ); + break; + case BACKUP_UNDEFINED: default: fprintf(stderr, "ERROR: Unsupported backup action %s\n", argv[1]); From 276ca82af40ba791b9731d35cd68c8d4193d981e Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 4 Jan 2019 00:15:51 -0800 Subject: [PATCH 36/55] Added --loggroup option to all fdbbackup modes. --- fdbbackup/backup.actor.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index c10369ad70..7b5da88cc6 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -120,7 +120,6 @@ CSimpleOpt::SOption g_rgAgentOptions[] = { #endif { OPT_CLUSTERFILE, "-C", SO_REQ_SEP }, { OPT_CLUSTERFILE, "--cluster_file", SO_REQ_SEP }, - { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_KNOB, "--knob_", SO_REQ_SEP }, { OPT_VERSION, "--version", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, @@ -128,6 +127,7 @@ CSimpleOpt::SOption g_rgAgentOptions[] = { { OPT_QUIET, "--quiet", SO_NONE }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_CRASHONERROR, "--crash", SO_NONE }, { OPT_LOCALITY, "--locality_", SO_REQ_SEP }, { OPT_MEMLIMIT, "-m", SO_REQ_SEP }, @@ -163,6 +163,7 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = { { OPT_DRYRUN, "--dryrun", SO_NONE }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -192,6 +193,7 @@ CSimpleOpt::SOption g_rgBackupStatusOptions[] = { { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_VERSION, "--version", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, { OPT_QUIET, "-q", SO_NONE }, @@ -217,6 +219,7 @@ CSimpleOpt::SOption g_rgBackupAbortOptions[] = { { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -244,6 +247,7 @@ CSimpleOpt::SOption g_rgBackupDiscontinueOptions[] = { { OPT_WAITFORDONE, "--waitfordone", SO_NONE }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -271,6 +275,7 @@ CSimpleOpt::SOption g_rgBackupWaitOptions[] = { { OPT_NOSTOPWHENDONE, "--no-stop-when-done",SO_NONE }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -294,6 +299,7 @@ CSimpleOpt::SOption g_rgBackupPauseOptions[] = { { OPT_CLUSTERFILE, "--cluster_file", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -319,6 +325,7 @@ CSimpleOpt::SOption g_rgBackupExpireOptions[] = { { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, @@ -352,6 +359,7 @@ CSimpleOpt::SOption g_rgBackupDeleteOptions[] = { { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, @@ -379,6 +387,7 @@ CSimpleOpt::SOption g_rgBackupDescribeOptions[] = { { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, @@ -408,6 +417,7 @@ CSimpleOpt::SOption g_rgBackupDumpOptions[] = { { OPT_DESTCONTAINER, "--destcontainer", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, @@ -435,6 +445,7 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = { { OPT_BASEURL, "--base_url", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, @@ -472,6 +483,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = { { OPT_DBVERSION, "-v", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_DRYRUN, "-n", SO_NONE }, @@ -505,6 +517,7 @@ CSimpleOpt::SOption g_rgDBAgentOptions[] = { { OPT_QUIET, "--quiet", SO_NONE }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_CRASHONERROR, "--crash", SO_NONE }, { OPT_LOCALITY, "--locality_", SO_REQ_SEP }, { OPT_MEMLIMIT, "-m", SO_REQ_SEP }, @@ -531,6 +544,7 @@ CSimpleOpt::SOption g_rgDBStartOptions[] = { { OPT_BACKUPKEYS, "--keys", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -560,6 +574,7 @@ CSimpleOpt::SOption g_rgDBStatusOptions[] = { { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_VERSION, "--version", SO_NONE }, { OPT_VERSION, "-v", SO_NONE }, { OPT_QUIET, "-q", SO_NONE }, @@ -587,6 +602,7 @@ CSimpleOpt::SOption g_rgDBSwitchOptions[] = { { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -615,6 +631,7 @@ CSimpleOpt::SOption g_rgDBAbortOptions[] = { { OPT_TAGNAME, "--tagname", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, @@ -640,6 +657,7 @@ CSimpleOpt::SOption g_rgDBPauseOptions[] = { { OPT_DEST_CLUSTER, "--destination", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, { OPT_TRACE_DIR, "--logdir", SO_REQ_SEP }, + { OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP }, { OPT_QUIET, "-q", SO_NONE }, { OPT_QUIET, "--quiet", SO_NONE }, { OPT_VERSION, "--version", SO_NONE }, From 8d4d8ccba6368fca7f6b267f4bff7c1beb016c7d Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 4 Jan 2019 00:17:02 -0800 Subject: [PATCH 37/55] Stopped escaping some printable characters in urlEscape() method for normalizing HTTP resource strings. --- fdbclient/HTTP.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 0307185a4a..5278378da2 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -31,7 +31,7 @@ namespace HTTP { o.reserve(s.size() * 3); char buf[4]; for(auto c : s) - if(std::isalnum(c)) + if(std::isalnum(c) || c == '?' || c == '/' || c == '-' || c == '_') o.append(&c, 1); else { sprintf(buf, "%%%.02X", c); From f05bb0eb9b9cd90260cdbf0516a80658adefc6a7 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 4 Jan 2019 00:35:58 -0800 Subject: [PATCH 38/55] Stop escaping dot in HTTP resource string normalization. --- fdbclient/HTTP.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 5278378da2..7b763abe4a 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -31,7 +31,7 @@ namespace HTTP { o.reserve(s.size() * 3); char buf[4]; for(auto c : s) - if(std::isalnum(c) || c == '?' || c == '/' || c == '-' || c == '_') + if(std::isalnum(c) || c == '?' || c == '/' || c == '-' || c == '_' || c == '.') o.append(&c, 1); else { sprintf(buf, "%%%.02X", c); From 57293a2db09cfda205cbe5929ec0f8df4a4a5916 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 4 Jan 2019 10:32:31 -0800 Subject: [PATCH 39/55] byte sample recovery did not use limits for its range reads, leading to slow tasks --- fdbrpc/TLSConnection.actor.cpp | 8 +-- fdbserver/DataDistribution.actor.cpp | 2 +- fdbserver/Knobs.cpp | 2 + fdbserver/Knobs.h | 2 + fdbserver/storageserver.actor.cpp | 91 ++++++++++++++++++---------- 5 files changed, 69 insertions(+), 36 deletions(-) diff --git a/fdbrpc/TLSConnection.actor.cpp b/fdbrpc/TLSConnection.actor.cpp index 05ae1efb5b..7ad78fecc1 100644 --- a/fdbrpc/TLSConnection.actor.cpp +++ b/fdbrpc/TLSConnection.actor.cpp @@ -47,10 +47,10 @@ static int send_func(void* ctx, const uint8_t* buf, int len) { int w = conn->conn->write( &sb ); return w; } catch ( Error& e ) { - TraceEvent("TLSConnectionSendError", conn->getDebugID()).error(e); + TraceEvent("TLSConnectionSendError", conn->getDebugID()).error(e).suppressFor(1.0); return -1; } catch ( ... ) { - TraceEvent("TLSConnectionSendError", conn->getDebugID()).error( unknown_error() ); + TraceEvent("TLSConnectionSendError", conn->getDebugID()).error( unknown_error() ).suppressFor(1.0); return -1; } } @@ -63,10 +63,10 @@ static int recv_func(void* ctx, uint8_t* buf, int len) { int r = conn->conn->read( buf, buf + len ); return r; } catch ( Error& e ) { - TraceEvent("TLSConnectionRecvError", conn->getDebugID()).error(e); + TraceEvent("TLSConnectionRecvError", conn->getDebugID()).error(e).suppressFor(1.0); return -1; } catch ( ... ) { - TraceEvent("TLSConnectionRecvError", conn->getDebugID()).error( unknown_error() ); + TraceEvent("TLSConnectionRecvError", conn->getDebugID()).error( unknown_error() ).suppressFor(1.0); return -1; } } diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index f5f8a8226b..647c61c3cb 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -1956,7 +1956,7 @@ ACTOR Future storageServerTracker( } } } catch( Error &e ) { - if (e.code() != error_code_actor_cancelled) + if (e.code() != error_code_actor_cancelled && errorOut.canBeSet()) errorOut.sendError(e); throw; } diff --git a/fdbserver/Knobs.cpp b/fdbserver/Knobs.cpp index e0fa6538b5..a924bc9057 100644 --- a/fdbserver/Knobs.cpp +++ b/fdbserver/Knobs.cpp @@ -373,6 +373,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) { init( MAX_STORAGE_SERVER_WATCH_BYTES, 100e6 ); if( randomize && BUGGIFY ) MAX_STORAGE_SERVER_WATCH_BYTES = 10e3; init( MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE, 1e9 ); if( randomize && BUGGIFY ) MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE = 1e3; init( LONG_BYTE_SAMPLE_RECOVERY_DELAY, 60.0 ); + init( BYTE_SAMPLE_LOAD_PARALLELISM, 32 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1; + init( BYTE_SAMPLE_LOAD_DELAY, 0.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.1; //Wait Failure init( BUGGIFY_OUTSTANDING_WAIT_FAILURE_REQUESTS, 2 ); diff --git a/fdbserver/Knobs.h b/fdbserver/Knobs.h index 8bfa47ddb2..f624c16f0b 100644 --- a/fdbserver/Knobs.h +++ b/fdbserver/Knobs.h @@ -311,6 +311,8 @@ public: int MAX_STORAGE_SERVER_WATCH_BYTES; int MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE; double LONG_BYTE_SAMPLE_RECOVERY_DELAY; + int BYTE_SAMPLE_LOAD_PARALLELISM; + double BYTE_SAMPLE_LOAD_DELAY; //Wait Failure int BUGGIFY_OUTSTANDING_WAIT_FAILURE_REQUESTS; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 4b903f4065..ec3f0e8b76 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -2814,48 +2814,76 @@ void StorageServerDisk::changeLogProtocol(Version version, uint64_t protocol) { data->addMutationToMutationLogOrStorage(version, MutationRef(MutationRef::SetValue, persistLogProtocol, BinaryWriter::toValue(protocol, Unversioned()))); } -ACTOR Future applyByteSampleResult( StorageServer* data, KeyRange range, Future>> result) { - Standalone> bs = wait( result ); - for( int j = 0; j < bs.size(); j++ ) { - KeyRef key = bs[j].key.removePrefix(persistByteSampleKeys.begin); - if(!data->byteSampleClears.rangeContaining(key).value()) { - data->metrics.byteSample.sample.insert( key, BinaryReader::fromStringRef(bs[j].value, Unversioned()), false ); +ACTOR Future applyByteSampleResult( StorageServer* data, IKeyValueStore* storage, Key begin, Key end, std::vector>>* results = NULL) { + state int totalFetches = 0; + state int totalKeys = 0; + state int totalBytes = 0; + loop { + Standalone> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) ); + if(results) results->push_back(bs); + int rangeSize = bs.expectedSize(); + totalFetches++; + totalKeys += bs.size(); + totalBytes += rangeSize; + for( int j = 0; j < bs.size(); j++ ) { + KeyRef key = bs[j].key.removePrefix(persistByteSampleKeys.begin); + if(!data->byteSampleClears.rangeContaining(key).value()) { + data->metrics.byteSample.sample.insert( key, BinaryReader::fromStringRef(bs[j].value, Unversioned()), false ); + } + } + if( rangeSize >= SERVER_KNOBS->STORAGE_LIMIT_BYTES ) { + Key nextBegin = keyAfter(bs.back().key); + data->byteSampleClears.insert(KeyRangeRef(begin, nextBegin).removePrefix(persistByteSampleKeys.begin), true); + data->byteSampleClearsTooLarge.set(data->byteSampleClears.size() > SERVER_KNOBS->MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE); + begin = nextBegin; + if(begin == end) { + break; + } + } else { + data->byteSampleClears.insert(KeyRangeRef(begin.removePrefix(persistByteSampleKeys.begin), end == persistByteSampleKeys.end ? LiteralStringRef("\xff\xff\xff") : end.removePrefix(persistByteSampleKeys.begin)), true); + data->byteSampleClearsTooLarge.set(data->byteSampleClears.size() > SERVER_KNOBS->MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE); + break; + } + + if(!results) { + Void _ = wait(delay(SERVER_KNOBS->BYTE_SAMPLE_LOAD_DELAY)); } } - data->byteSampleClears.insert(range, true); - data->byteSampleClearsTooLarge.set(data->byteSampleClears.size() > SERVER_KNOBS->MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE); - + TraceEvent("RecoveredByteSampleRange", data->thisServerID).detail("Begin", printable(begin)).detail("End", printable(end)).detail("Fetches", totalFetches).detail("Keys", totalKeys).detail("ReadBytes", totalBytes); return Void(); } -ACTOR Future restoreByteSample(StorageServer* data, IKeyValueStore* storage, Standalone> bsSample) { +ACTOR Future restoreByteSample(StorageServer* data, IKeyValueStore* storage, Promise byteSampleSampleRecovered) { + state std::vector>> byteSampleSample; + Void _ = wait( applyByteSampleResult(data, storage, persistByteSampleSampleKeys.begin, persistByteSampleSampleKeys.end, &byteSampleSample) ); + byteSampleSampleRecovered.send(Void()); Void _ = wait( delay( BUGGIFY ? g_random->random01() * 2.0 : 0.0001 ) ); - TraceEvent("RecoveredByteSampleSample", data->thisServerID).detail("Keys", bsSample.size()).detail("ReadBytes", bsSample.expectedSize()); - size_t bytes_per_fetch = 0; // Since the expected size also includes (as of now) the space overhead of the container, we calculate our own number here - for( int i = 0; i < bsSample.size(); i++ ) - bytes_per_fetch += BinaryReader::fromStringRef(bsSample[i].value, Unversioned()); - bytes_per_fetch /= 32; + for( auto& it : byteSampleSample ) { + for( auto& kv : it ) { + bytes_per_fetch += BinaryReader::fromStringRef(kv.value, Unversioned()); + } + } + bytes_per_fetch = (bytes_per_fetch/SERVER_KNOBS->BYTE_SAMPLE_LOAD_PARALLELISM) + 1; state std::vector> sampleRanges; int accumulatedSize = 0; - std::string prefix = PERSIST_PREFIX "BS/"; - Key lastStart = LiteralStringRef( PERSIST_PREFIX "BS/" ); // make sure the first range starts at the absolute beginning of the byte sample - for( auto it = bsSample.begin(); it != bsSample.end(); ++it ) { - if( accumulatedSize >= bytes_per_fetch ) { - accumulatedSize = 0; - Key realKey = it->key.removePrefix( prefix ); - KeyRange sampleRange = KeyRangeRef( lastStart, realKey ); - sampleRanges.push_back( applyByteSampleResult(data, sampleRange.removePrefix(persistByteSampleKeys.begin), storage->readRange( sampleRange )) ); - lastStart = realKey; + Key lastStart = persistByteSampleKeys.begin; // make sure the first range starts at the absolute beginning of the byte sample + for( auto& it : byteSampleSample ) { + for( auto& kv : it ) { + if( accumulatedSize >= bytes_per_fetch ) { + accumulatedSize = 0; + Key realKey = kv.key.removePrefix( persistByteSampleKeys.begin ); + sampleRanges.push_back( applyByteSampleResult(data, storage, lastStart, realKey) ); + lastStart = realKey; + } + accumulatedSize += BinaryReader::fromStringRef(kv.value, Unversioned()); } - accumulatedSize += BinaryReader::fromStringRef(it->value, Unversioned()); } // make sure that the last range goes all the way to the end of the byte sample - KeyRange sampleRange = KeyRangeRef( lastStart, LiteralStringRef( PERSIST_PREFIX "BS0" )); - sampleRanges.push_back( applyByteSampleResult(data, KeyRangeRef(lastStart.removePrefix(persistByteSampleKeys.begin), LiteralStringRef("\xff\xff\xff")), storage->readRange( sampleRange )) ); + sampleRanges.push_back( applyByteSampleResult(data, storage, lastStart, persistByteSampleKeys.end) ); Void _ = wait( waitForAll( sampleRanges ) ); TraceEvent("RecoveredByteSampleChunkedRead", data->thisServerID).detail("Ranges",sampleRanges.size()); @@ -2873,11 +2901,14 @@ ACTOR Future restoreDurableState( StorageServer* data, IKeyValueStore* sto state Future> fLogProtocol = storage->readValue(persistLogProtocol); state Future>> fShardAssigned = storage->readRange(persistShardAssignedKeys); state Future>> fShardAvailable = storage->readRange(persistShardAvailableKeys); - state Future>> fByteSampleSample = storage->readRange(persistByteSampleSampleKeys); + + state Promise byteSampleSampleRecovered; + data->byteSampleRecovery = restoreByteSample(data, storage, byteSampleSampleRecovered); TraceEvent("ReadingDurableState", data->thisServerID); Void _ = wait( waitForAll( (vector>>(), fFormat, fID, fVersion, fLogProtocol) ) ); - Void _ = wait( waitForAll( (vector>>>(), fShardAssigned, fShardAvailable, fByteSampleSample) ) ); + Void _ = wait( waitForAll( (vector>>>(), fShardAssigned, fShardAvailable) ) ); + Void _ = wait( byteSampleSampleRecovered.getFuture() ); TraceEvent("RestoringDurableState", data->thisServerID); if (!fFormat.get().present()) { @@ -2932,8 +2963,6 @@ ACTOR Future restoreDurableState( StorageServer* data, IKeyValueStore* sto Void _ = wait(yield()); } - Void _ = wait( applyByteSampleResult(data, persistByteSampleSampleKeys.removePrefix(persistByteSampleKeys.begin), fByteSampleSample) ); - data->byteSampleRecovery = restoreByteSample(data, storage, fByteSampleSample.get()); Void _ = wait( delay( 0.0001 ) ); From 339e15563e4b9fd9573e06dd5a2269fee0f6a077 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 4 Jan 2019 18:34:16 -0800 Subject: [PATCH 40/55] Bug fix in blobstore request loop where zero-content requests (i.e. not uploads) could cause mismatches between requests and responses. Added HTTPRequest debug event. --- fdbclient/BlobStore.actor.cpp | 2 +- fdbclient/HTTP.actor.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fdbclient/BlobStore.actor.cpp b/fdbclient/BlobStore.actor.cpp index 32d0864f70..d222491695 100644 --- a/fdbclient/BlobStore.actor.cpp +++ b/fdbclient/BlobStore.actor.cpp @@ -511,8 +511,8 @@ ACTOR Future> doRequest_impl(Reference frconn = bstore->connect(); // Make a shallow copy of the queue by calling addref() on each buffer in the chain and then prepending that chain to contentCopy + contentCopy.discardAll(); if(pContent != nullptr) { - contentCopy.discardAll(); PacketBuffer *pFirst = pContent->getUnsent(); PacketBuffer *pLast = nullptr; for(PacketBuffer *p = pFirst; p != nullptr; p = p->nextPacketBuffer()) { diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 7b763abe4a..3bfca52b6e 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -294,6 +294,8 @@ namespace HTTP { // and be destroyed by the caller // TODO: pSent is very hackish, do something better. ACTOR Future> doRequest(Reference conn, std::string verb, std::string resource, HTTP::Headers headers, UnsentPacketQueue *pContent, int contentLen, Reference sendRate, int64_t *pSent, Reference recvRate) { + state TraceEvent event(SevDebug, "HTTPRequest"); + state UnsentPacketQueue empty; if(pContent == NULL) pContent = ∅ @@ -301,6 +303,12 @@ namespace HTTP { state bool earlyResponse = false; state int total_sent = 0; + event.detail("DebugID", conn->getDebugID()); + event.detail("RemoteAddress", conn->getPeerAddress()); + event.detail("Verb", verb); + event.detail("Resource", resource); + event.detail("RequestContentLen", contentLen); + try { // Write headers to a packet buffer chain PacketBuffer *pFirst = new PacketBuffer(); @@ -347,8 +355,11 @@ namespace HTTP { } Void _ = wait(responseReading); + event.detail("ResponseCode", r->code); + event.detail("ResponseContentLen", r->contentLen); double elapsed = timer() - send_start; + event.detail("Elapsed", elapsed); if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) printf("[%s] HTTP code=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n", conn->getDebugID().toString().c_str(), r->code, earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent, (int)r->contentLen); From 0ec216a1fa3b38ad24e970ddb175eeb086c0d770 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Mon, 7 Jan 2019 17:56:38 -0800 Subject: [PATCH 41/55] Added X-Request-ID header to HTTP requests and verification of matching ID in response, if present. --- fdbclient/HTTP.actor.cpp | 11 +++++++++++ flow/error_definitions.h | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 3bfca52b6e..498dfc3e1e 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -310,6 +310,10 @@ namespace HTTP { event.detail("RequestContentLen", contentLen); try { + state std::string requestID = g_random->randomUniqueID().toString(); + event.detail("RequestIDSent", requestID); + headers["X-Request-ID"] = requestID; + // Write headers to a packet buffer chain PacketBuffer *pFirst = new PacketBuffer(); PacketBuffer *pLast = writeRequestHeader(verb, resource, headers, pFirst); @@ -358,6 +362,12 @@ namespace HTTP { event.detail("ResponseCode", r->code); event.detail("ResponseContentLen", r->contentLen); + auto iid = r->headers.find("X-Request-ID"); + if(iid != r->headers.end() && iid->second != requestID) { + event.detail("RequestIDReceived", iid->second); + throw http_bad_request_id(); + } + double elapsed = timer() - send_start; event.detail("Elapsed", elapsed); if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) @@ -371,6 +381,7 @@ namespace HTTP { if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) printf("[%s] HTTP *ERROR*=%s early=%d, time=%fs %s %s contentLen=%d [%d out]\n", conn->getDebugID().toString().c_str(), e.name(), earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent); + event.error(e); throw; } } diff --git a/flow/error_definitions.h b/flow/error_definitions.h index d6a5acdcb3..915265491a 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -101,7 +101,7 @@ ERROR( io_timeout, 1521, "A disk IO operation failed to complete in a timely man ERROR( file_corrupt, 1522, "A structurally corrupt data file was detected" ) ERROR( http_request_failed, 1523, "HTTP response code not received or indicated failure" ) ERROR( http_auth_failed, 1524, "HTTP request failed due to bad credentials" ) - +ERROR( http_bad_request_id, 1525, "HTTP response contained an unexpected X-Request-ID header" ) // 2xxx Attempt (presumably by a _client_) to do something illegal. If an error is known to // be internally caused, it should be 41xx From 5a6576c85af13cf6c180d0ef6b72f380bace0755 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 7 Jan 2019 18:15:36 -0800 Subject: [PATCH 42/55] updated documentation for 6.0.18 --- documentation/sphinx/source/downloads.rst | 24 +++++++++---------- documentation/sphinx/source/release-notes.rst | 16 +++++++++++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/documentation/sphinx/source/downloads.rst b/documentation/sphinx/source/downloads.rst index 611345df8d..f6c99427ea 100644 --- a/documentation/sphinx/source/downloads.rst +++ b/documentation/sphinx/source/downloads.rst @@ -10,38 +10,38 @@ macOS The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. -* `FoundationDB-6.0.17.pkg `_ +* `FoundationDB-6.0.18.pkg `_ Ubuntu ------ The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. -* `foundationdb-clients-6.0.17-1_amd64.deb `_ -* `foundationdb-server-6.0.17-1_amd64.deb `_ (depends on the clients package) +* `foundationdb-clients-6.0.18-1_amd64.deb `_ +* `foundationdb-server-6.0.18-1_amd64.deb `_ (depends on the clients package) RHEL/CentOS EL6 --------------- The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. -* `foundationdb-clients-6.0.17-1.el6.x86_64.rpm `_ -* `foundationdb-server-6.0.17-1.el6.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.0.18-1.el6.x86_64.rpm `_ +* `foundationdb-server-6.0.18-1.el6.x86_64.rpm `_ (depends on the clients package) RHEL/CentOS EL7 --------------- The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. -* `foundationdb-clients-6.0.17-1.el7.x86_64.rpm `_ -* `foundationdb-server-6.0.17-1.el7.x86_64.rpm `_ (depends on the clients package) +* `foundationdb-clients-6.0.18-1.el7.x86_64.rpm `_ +* `foundationdb-server-6.0.18-1.el7.x86_64.rpm `_ (depends on the clients package) Windows ------- The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. -* `foundationdb-6.0.17-x64.msi `_ +* `foundationdb-6.0.18-x64.msi `_ API Language Bindings ===================== @@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: -* `foundationdb-6.0.17.tar.gz `_ +* `foundationdb-6.0.18.tar.gz `_ Ruby 1.9.3/2.0.0+ ----------------- -* `fdb-6.0.17.gem `_ +* `fdb-6.0.18.gem `_ Java 8+ ------- -* `fdb-java-6.0.17.jar `_ -* `fdb-java-6.0.17-javadoc.jar `_ +* `fdb-java-6.0.18.jar `_ +* `fdb-java-6.0.18-javadoc.jar `_ Go 1.1+ ------- diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 94bf9872eb..6697136f95 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -2,6 +2,22 @@ Release Notes ############# +6.0.18 +====== + +Fixes +----- + +* Backup metadata could falsely indicate that the backup is not usable. `(PR #1007) `_ +* Backup expire and delete operations could fail to delete some files. `(PR #1007) `_ +* Restore could fail to apply some range files. `(PR #1007) `_ +* Storage servers with large amounts of data would pause for a short period of time after rebooting. `(PR #1001) `_ + +Features +-------- + +* Added the ability to specify versions as version-days ago from latest log in backup. `(PR #1007) `_ + 6.0.17 ====== From 962ff501c1390162c998297133207b22a505ec77 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 7 Jan 2019 18:23:05 -0800 Subject: [PATCH 43/55] updated release notes --- documentation/sphinx/source/release-notes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 6697136f95..7f6fd66b04 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -9,8 +9,8 @@ Fixes ----- * Backup metadata could falsely indicate that the backup is not usable. `(PR #1007) `_ -* Backup expire and delete operations could fail to delete some files. `(PR #1007) `_ -* Restore could fail to apply some range files. `(PR #1007) `_ +* Blobstore request failures could cause backup expire and delete operations to skip some files. `(PR #1007) `_ +* Blobstore request failures could cause restore to fail to apply some files. `(PR #1007) `_ * Storage servers with large amounts of data would pause for a short period of time after rebooting. `(PR #1001) `_ Features From b4a8dea183fd60fb49cab220b4683e7840ebfcc1 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Mon, 7 Jan 2019 18:24:16 -0800 Subject: [PATCH 44/55] fixed a typo --- documentation/sphinx/source/release-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 7f6fd66b04..23a439b7d4 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -8,7 +8,7 @@ Release Notes Fixes ----- -* Backup metadata could falsely indicate that the backup is not usable. `(PR #1007) `_ +* Backup metadata could falsely indicate that a backup is not usable. `(PR #1007) `_ * Blobstore request failures could cause backup expire and delete operations to skip some files. `(PR #1007) `_ * Blobstore request failures could cause restore to fail to apply some files. `(PR #1007) `_ * Storage servers with large amounts of data would pause for a short period of time after rebooting. `(PR #1001) `_ From d2655171560e790a8f01e5db723083c077bfb85f Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Tue, 8 Jan 2019 14:36:01 -0800 Subject: [PATCH 45/55] Fix: fast allocator would not cleanup memory for a thread if that thread never called getMagazine. This could happen if the first thing the thread did was to release memory. Added a new metric for the number of threads that hold memory for each size and improve some existing metrics. Fix: a failed ASSERT would crash if done early in the program lifetime. --- documentation/sphinx/source/release-notes.rst | 1 + fdbserver/storageserver.actor.cpp | 4 +- flow/Error.cpp | 2 +- flow/FastAlloc.cpp | 160 ++++++++++++------ flow/FastAlloc.h | 9 +- flow/Net2.actor.cpp | 4 +- flow/Platform.cpp | 2 +- flow/SystemMonitor.cpp | 4 +- 8 files changed, 122 insertions(+), 64 deletions(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 23a439b7d4..4360912d43 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -12,6 +12,7 @@ Fixes * Blobstore request failures could cause backup expire and delete operations to skip some files. `(PR #1007) `_ * Blobstore request failures could cause restore to fail to apply some files. `(PR #1007) `_ * Storage servers with large amounts of data would pause for a short period of time after rebooting. `(PR #1001) `_ +* The client library could leak memory when a thread died. Features -------- diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index ec3f0e8b76..56389d5f22 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -3562,7 +3562,7 @@ void versionedMapTest() { const int NSIZE = sizeof(VersionedMap::PTreeT); const int ASIZE = NSIZE<=64 ? 64 : NextPowerOfTwo::Result; - auto before = FastAllocator< ASIZE >::getMemoryUsed(); + auto before = FastAllocator< ASIZE >::getTotalMemory(); for(int v=1; v<=1000; ++v) { vm.createNewVersion(v); @@ -3576,7 +3576,7 @@ void versionedMapTest() { } } - auto after = FastAllocator< ASIZE >::getMemoryUsed(); + auto after = FastAllocator< ASIZE >::getTotalMemory(); int count = 0; for(auto i = vm.atLatest().begin(); i != vm.atLatest().end(); ++i) diff --git a/flow/Error.cpp b/flow/Error.cpp index 7c0d56b750..cabbcc8a5d 100644 --- a/flow/Error.cpp +++ b/flow/Error.cpp @@ -115,5 +115,5 @@ void ErrorCodeTable::addCode(int code, const char *name, const char *description } bool isAssertDisabled(int line) { - return FLOW_KNOBS->DISABLE_ASSERTS == -1 || FLOW_KNOBS->DISABLE_ASSERTS == line; + return FLOW_KNOBS && (FLOW_KNOBS->DISABLE_ASSERTS == -1 || FLOW_KNOBS->DISABLE_ASSERTS == line); } diff --git a/flow/FastAlloc.cpp b/flow/FastAlloc.cpp index 9a853219a0..dc5987e66f 100644 --- a/flow/FastAlloc.cpp +++ b/flow/FastAlloc.cpp @@ -61,13 +61,16 @@ template INIT_SEG thread_local typename FastAllocator::ThreadData FastAllocator::threadData; +template +thread_local bool FastAllocator::threadInitialized = false; + #ifdef VALGRIND template unsigned long FastAllocator::vLock = 1; #endif template -void* FastAllocator::freelist = 0; +void* FastAllocator::freelist = nullptr; typedef void (*ThreadInitFunction)(); @@ -118,32 +121,30 @@ void recordAllocation( void *ptr, size_t size ) { #error Instrumentation not supported on this platform #endif + uint32_t a = 0, b = 0; if( nptrs > 0 ) { - uint32_t a = 0, b = 0; hashlittle2( buffer, nptrs * sizeof(void *), &a, &b ); - - { - double countDelta = std::max(1.0, ((double)SAMPLE_BYTES) / size); - size_t sizeDelta = std::max(SAMPLE_BYTES, size); - ThreadSpinLockHolder holder( memLock ); - auto it = backTraceLookup.find( a ); - if( it == backTraceLookup.end() ) { - auto& bt = backTraceLookup[ a ]; - bt.backTrace = new std::vector(); - for (int j = 0; j < nptrs; j++) { - bt.backTrace->push_back( buffer[j] ); - } - bt.totalSize = sizeDelta; - bt.count = countDelta; - bt.sampleCount = 1; - } else { - it->second.totalSize += sizeDelta; - it->second.count += countDelta; - it->second.sampleCount++; - } - memSample[(int64_t)ptr] = std::make_pair(a, size); - } } + + double countDelta = std::max(1.0, ((double)SAMPLE_BYTES) / size); + size_t sizeDelta = std::max(SAMPLE_BYTES, size); + ThreadSpinLockHolder holder( memLock ); + auto it = backTraceLookup.find( a ); + if( it == backTraceLookup.end() ) { + auto& bt = backTraceLookup[ a ]; + bt.backTrace = new std::vector(); + for (int j = 0; j < nptrs; j++) { + bt.backTrace->push_back( buffer[j] ); + } + bt.totalSize = sizeDelta; + bt.count = countDelta; + bt.sampleCount = 1; + } else { + it->second.totalSize += sizeDelta; + it->second.count += countDelta; + it->second.sampleCount++; + } + memSample[(int64_t)ptr] = std::make_pair(a, size); } memSample_entered = false; #endif @@ -188,20 +189,28 @@ struct FastAllocator::GlobalData { CRITICAL_SECTION mutex; std::vector magazines; // These magazines are always exactly magazine_size ("full") std::vector> partial_magazines; // Magazines that are not "full" and their counts. Only created by releaseThreadMagazines(). - long long memoryUsed; - GlobalData() : memoryUsed(0) { + long long totalMemory; + long long partialMagazineUnallocatedMemory; + long long activeThreads; + GlobalData() : totalMemory(0), partialMagazineUnallocatedMemory(0), activeThreads(0) { InitializeCriticalSection(&mutex); } }; template -long long FastAllocator::getMemoryUsed() { - return globalData()->memoryUsed; +long long FastAllocator::getTotalMemory() { + return globalData()->totalMemory; +} + +// This does not include memory held by various threads that's available for allocation +template +long long FastAllocator::getApproximateMemoryUnused() { + return globalData()->magazines.size() * magazine_size * Size + globalData()->partialMagazineUnallocatedMemory; } template -long long FastAllocator::getMemoryUnused() { - return globalData()->magazines.size() * magazine_size * Size; +long long FastAllocator::getActiveThreads() { + return globalData()->activeThreads; } static int64_t getSizeCode(int i) { @@ -221,15 +230,21 @@ static int64_t getSizeCode(int i) { template void *FastAllocator::allocate() { + if(!threadInitialized) { + initThread(); + } + #if FASTALLOC_THREAD_SAFE ThreadData& thr = threadData; if (!thr.freelist) { + ASSERT(thr.count == 0); if (thr.alternate) { thr.freelist = thr.alternate; - thr.alternate = 0; + thr.alternate = nullptr; thr.count = magazine_size; - } else + } else { getMagazine(); + } } --thr.count; void* p = thr.freelist; @@ -237,6 +252,7 @@ void *FastAllocator::allocate() { VALGRIND_MAKE_MEM_DEFINED(p, sizeof(void*)); #endif thr.freelist = *(void**)p; + ASSERT(!thr.freelist == (thr.count == 0)); // freelist is empty if and only if count is 0 //check( p, true ); #else void* p = freelist; @@ -257,15 +273,22 @@ void *FastAllocator::allocate() { template void FastAllocator::release(void *ptr) { + if(!threadInitialized) { + initThread(); + } + #if FASTALLOC_THREAD_SAFE ThreadData& thr = threadData; if (thr.count == magazine_size) { if (thr.alternate) // Two full magazines, return one releaseMagazine( thr.alternate ); thr.alternate = thr.freelist; - thr.freelist = 0; + thr.freelist = nullptr; thr.count = 0; } + + ASSERT(!thr.freelist == (thr.count == 0)); // freelist is empty if and only if count is 0 + ++thr.count; *(void**)ptr = thr.freelist; //check(ptr, false); @@ -334,9 +357,27 @@ void FastAllocator::check(void* ptr, bool alloc) { #endif } +template +void FastAllocator::initThread() { + threadInitialized = true; + if (threadInitFunction) { + threadInitFunction(); + } + + EnterCriticalSection(&globalData()->mutex); + ++globalData()->activeThreads; + LeaveCriticalSection(&globalData()->mutex); + + threadData.freelist = nullptr; + threadData.alternate = nullptr; + threadData.count = 0; +} + template void FastAllocator::getMagazine() { - if (threadInitFunction) threadInitFunction(); + ASSERT(threadInitialized); + ASSERT(!threadData.freelist && !threadData.alternate && threadData.count == 0); + EnterCriticalSection(&globalData()->mutex); if (globalData()->magazines.size()) { void* m = globalData()->magazines.back(); @@ -348,12 +389,13 @@ void FastAllocator::getMagazine() { } else if (globalData()->partial_magazines.size()) { std::pair p = globalData()->partial_magazines.back(); globalData()->partial_magazines.pop_back(); + globalData()->partialMagazineUnallocatedMemory -= p.first * Size; LeaveCriticalSection(&globalData()->mutex); threadData.freelist = p.second; threadData.count = p.first; return; } - globalData()->memoryUsed += magazine_size*Size; + globalData()->totalMemory += magazine_size*Size; LeaveCriticalSection(&globalData()->mutex); // Allocate a new page of data from the system allocator @@ -361,7 +403,7 @@ void FastAllocator::getMagazine() { interlockedIncrement(&pageCount); #endif - void** block = 0; + void** block = nullptr; #if FAST_ALLOCATOR_DEBUG #ifdef WIN32 static int alt = 0; alt++; @@ -386,30 +428,42 @@ void FastAllocator::getMagazine() { check( &block[i*PSize], false ); } - block[(magazine_size-1)*PSize+1] = block[(magazine_size-1)*PSize] = 0; + block[(magazine_size-1)*PSize+1] = block[(magazine_size-1)*PSize] = nullptr; check( &block[(magazine_size-1)*PSize], false ); threadData.freelist = block; threadData.count = magazine_size; } template void FastAllocator::releaseMagazine(void* mag) { + ASSERT(threadInitialized); EnterCriticalSection(&globalData()->mutex); globalData()->magazines.push_back(mag); LeaveCriticalSection(&globalData()->mutex); } template void FastAllocator::releaseThreadMagazines() { - ThreadData& thr = threadData; + if(threadInitialized) { + threadInitialized = false; + ThreadData& thr = threadData; - if (thr.freelist || thr.alternate) { EnterCriticalSection(&globalData()->mutex); - if (thr.freelist) globalData()->partial_magazines.push_back( std::make_pair(thr.count, thr.freelist) ); - if (thr.alternate) globalData()->magazines.push_back(thr.alternate); + if (thr.freelist || thr.alternate) { + if (thr.freelist) { + ASSERT(thr.count > 0 && thr.count <= magazine_size); + globalData()->partial_magazines.push_back( std::make_pair(thr.count, thr.freelist) ); + globalData()->partialMagazineUnallocatedMemory += thr.count * Size; + } + if (thr.alternate) { + globalData()->magazines.push_back(thr.alternate); + } + } + --globalData()->activeThreads; LeaveCriticalSection(&globalData()->mutex); + + thr.count = 0; + thr.alternate = nullptr; + thr.freelist = nullptr; } - thr.count = 0; - thr.alternate = 0; - thr.freelist = 0; } void releaseAllThreadMagazines() { @@ -427,15 +481,15 @@ void releaseAllThreadMagazines() { int64_t getTotalUnusedAllocatedMemory() { int64_t unusedMemory = 0; - unusedMemory += FastAllocator<16>::getMemoryUnused(); - unusedMemory += FastAllocator<32>::getMemoryUnused(); - unusedMemory += FastAllocator<64>::getMemoryUnused(); - unusedMemory += FastAllocator<128>::getMemoryUnused(); - unusedMemory += FastAllocator<256>::getMemoryUnused(); - unusedMemory += FastAllocator<512>::getMemoryUnused(); - unusedMemory += FastAllocator<1024>::getMemoryUnused(); - unusedMemory += FastAllocator<2048>::getMemoryUnused(); - unusedMemory += FastAllocator<4096>::getMemoryUnused(); + unusedMemory += FastAllocator<16>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<32>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<64>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<128>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<256>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<512>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<1024>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<2048>::getApproximateMemoryUnused(); + unusedMemory += FastAllocator<4096>::getApproximateMemoryUnused(); return unusedMemory; } diff --git a/flow/FastAlloc.h b/flow/FastAlloc.h index cd7afa3a17..b8d8b19a1e 100644 --- a/flow/FastAlloc.h +++ b/flow/FastAlloc.h @@ -106,8 +106,9 @@ public: static void release(void* ptr); static void check( void* ptr, bool alloc ); - static long long getMemoryUsed(); - static long long getMemoryUnused(); + static long long getTotalMemory(); + static long long getApproximateMemoryUnused(); + static long long getActiveThreads(); static void releaseThreadMagazines(); @@ -129,6 +130,7 @@ private: void* alternate; // alternate is either a full magazine, or an empty one }; static thread_local ThreadData threadData; + static thread_local bool threadInitialized; static GlobalData* globalData() { #ifdef VALGRIND ANNOTATE_RWLOCK_ACQUIRED(vLock, 1); @@ -144,7 +146,8 @@ private: static void* freelist; FastAllocator(); // not implemented - static void getMagazine(); // sets threadData.freelist and threadData.count + static void initThread(); + static void getMagazine(); static void releaseMagazine(void*); }; diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index 0b108ccf38..ae716734a5 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -1097,7 +1097,7 @@ void net2_test() { Endpoint destination; - printf(" Used: %lld\n", FastAllocator<4096>::getMemoryUsed()); + printf(" Used: %lld\n", FastAllocator<4096>::getTotalMemory()); char junk[100]; @@ -1147,6 +1147,6 @@ void net2_test() { printf("SimSend x 1Kx10K: %0.2f sec\n", timer()-before); printf(" Bytes: %d\n", totalBytes); - printf(" Used: %lld\n", FastAllocator<4096>::getMemoryUsed()); + printf(" Used: %lld\n", FastAllocator<4096>::getTotalMemory()); */ }; diff --git a/flow/Platform.cpp b/flow/Platform.cpp index d612eeb174..a754c87477 100644 --- a/flow/Platform.cpp +++ b/flow/Platform.cpp @@ -2232,7 +2232,7 @@ std::string getDefaultPluginPath( const char* plugin_name ) { }; // namespace platform #ifdef ALLOC_INSTRUMENTATION -#define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator::getMemoryUnused()/size).detail("TotalSize", FastAllocator::getMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na") +#define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator::getApproximateMemoryUnused()/size).detail("TotalSize", FastAllocator::getApproximateMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na") #ifdef __linux__ #include #endif diff --git a/flow/SystemMonitor.cpp b/flow/SystemMonitor.cpp index 135da33ac2..2c7c2f5a54 100644 --- a/flow/SystemMonitor.cpp +++ b/flow/SystemMonitor.cpp @@ -41,8 +41,8 @@ void systemMonitor() { customSystemMonitor("ProcessMetrics", &statState, true ); } -#define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator::getMemoryUnused()/size).detail("TotalSize", FastAllocator::getMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na") -#define DETAILALLOCATORMEMUSAGE( size ) detail("AllocatedMemory"#size, FastAllocator::getMemoryUsed()).detail("ApproximateUnusedMemory"#size, FastAllocator::getMemoryUnused()) +#define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator::getApproximateMemoryUnused()/size).detail("TotalSize", FastAllocator::getApproximateMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na") +#define DETAILALLOCATORMEMUSAGE( size ) detail("TotalMemory"#size, FastAllocator::getTotalMemory()).detail("ApproximateUnusedMemory"#size, FastAllocator::getApproximateMemoryUnused()).detail("ActiveThreads"#size, FastAllocator::getActiveThreads()) SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *statState, bool machineMetrics) { SystemStatistics currentStats = getSystemStatistics(machineState.folder.present() ? machineState.folder.get() : "", From b95d37def21d7b506794451d8f5cd75d37368795 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Tue, 8 Jan 2019 14:37:23 -0800 Subject: [PATCH 46/55] Add PR to release notes. --- documentation/sphinx/source/release-notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/sphinx/source/release-notes.rst b/documentation/sphinx/source/release-notes.rst index 4360912d43..03a770fdc5 100644 --- a/documentation/sphinx/source/release-notes.rst +++ b/documentation/sphinx/source/release-notes.rst @@ -12,7 +12,7 @@ Fixes * Blobstore request failures could cause backup expire and delete operations to skip some files. `(PR #1007) `_ * Blobstore request failures could cause restore to fail to apply some files. `(PR #1007) `_ * Storage servers with large amounts of data would pause for a short period of time after rebooting. `(PR #1001) `_ -* The client library could leak memory when a thread died. +* The client library could leak memory when a thread died. `(PR #1011) `_ Features -------- From d005594bdd71653824fc6bd92901932670aaca7c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 8 Jan 2019 14:48:47 -0800 Subject: [PATCH 47/55] Added optional support for sending a unique id per request in a header for logging/tracking purposes. --- fdbclient/HTTP.actor.cpp | 74 ++++++++++++++++++++++++++++++++-------- fdbclient/HTTP.h | 2 +- fdbclient/Knobs.cpp | 1 + fdbclient/Knobs.h | 1 + 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/fdbclient/HTTP.actor.cpp b/fdbclient/HTTP.actor.cpp index 498dfc3e1e..b8e497acf0 100644 --- a/fdbclient/HTTP.actor.cpp +++ b/fdbclient/HTTP.actor.cpp @@ -293,13 +293,19 @@ namespace HTTP { // Request content is provided as UnsentPacketQueue *pContent which will be depleted as bytes are sent but the queue itself must live for the life of this actor // and be destroyed by the caller // TODO: pSent is very hackish, do something better. - ACTOR Future> doRequest(Reference conn, std::string verb, std::string resource, HTTP::Headers headers, UnsentPacketQueue *pContent, int contentLen, Reference sendRate, int64_t *pSent, Reference recvRate) { + ACTOR Future> doRequest(Reference conn, std::string verb, std::string resource, HTTP::Headers headers, UnsentPacketQueue *pContent, int contentLen, Reference sendRate, int64_t *pSent, Reference recvRate, std::string requestIDHeader) { state TraceEvent event(SevDebug, "HTTPRequest"); state UnsentPacketQueue empty; if(pContent == NULL) pContent = ∅ + // There is no standard http request id header field, so either a global default can be set via a knob + // or it can be set per-request with the requestIDHeader argument (which overrides the default) + if(requestIDHeader.empty()) { + requestIDHeader = CLIENT_KNOBS->HTTP_REQUEST_ID_HEADER; + } + state bool earlyResponse = false; state int total_sent = 0; @@ -310,9 +316,17 @@ namespace HTTP { event.detail("RequestContentLen", contentLen); try { - state std::string requestID = g_random->randomUniqueID().toString(); - event.detail("RequestIDSent", requestID); - headers["X-Request-ID"] = requestID; + state std::string requestID; + if(!requestIDHeader.empty()) { + requestID = g_random->randomUniqueID().toString(); + requestID = requestID.insert(20, "-"); + requestID = requestID.insert(16, "-"); + requestID = requestID.insert(12, "-"); + requestID = requestID.insert(8, "-"); + + headers[requestIDHeader] = requestID; + event.detail("RequestIDSent", requestID); + } // Write headers to a packet buffer chain PacketBuffer *pFirst = new PacketBuffer(); @@ -359,28 +373,58 @@ namespace HTTP { } Void _ = wait(responseReading); + double elapsed = timer() - send_start; + event.detail("ResponseCode", r->code); event.detail("ResponseContentLen", r->contentLen); + event.detail("Elapsed", elapsed); - auto iid = r->headers.find("X-Request-ID"); - if(iid != r->headers.end() && iid->second != requestID) { - event.detail("RequestIDReceived", iid->second); - throw http_bad_request_id(); + Optional err; + if(!requestIDHeader.empty()) { + std::string responseID; + auto iid = r->headers.find(requestIDHeader); + if(iid != r->headers.end()) { + responseID = iid->second; + } + event.detail("RequestIDReceived", responseID); + if(requestID != responseID) { + err = http_bad_request_id(); + // Log a non-debug a error + TraceEvent(SevError, "HTTPRequestFailedIDMismatch") + .detail("DebugID", conn->getDebugID()) + .detail("RemoteAddress", conn->getPeerAddress()) + .detail("Verb", verb) + .detail("Resource", resource) + .detail("RequestContentLen", contentLen) + .detail("ResponseCode", r->code) + .detail("ResponseContentLen", r->contentLen) + .detail("RequestIDSent", requestID) + .detail("RequestIDReceived", responseID) + .error(err.get()); + } } - double elapsed = timer() - send_start; - event.detail("Elapsed", elapsed); - if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) - printf("[%s] HTTP code=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n", - conn->getDebugID().toString().c_str(), r->code, earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent, (int)r->contentLen); - if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) + if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) { + printf("[%s] HTTP %scode=%d early=%d, time=%fs %s %s contentLen=%d [%d out, response content len %d]\n", + conn->getDebugID().toString().c_str(), + (err.present() ? format("*ERROR*=%s ", err.get().name()).c_str() : ""), + r->code, earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent, (int)r->contentLen); + } + if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 2) { printf("[%s] HTTP RESPONSE: %s %s\n%s\n", conn->getDebugID().toString().c_str(), verb.c_str(), resource.c_str(), r->toString().c_str()); + } + + if(err.present()) { + throw err.get(); + } + return r; } catch(Error &e) { double elapsed = timer() - send_start; - if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0) + if(CLIENT_KNOBS->HTTP_VERBOSE_LEVEL > 0 && e.code() != error_code_http_bad_request_id) { printf("[%s] HTTP *ERROR*=%s early=%d, time=%fs %s %s contentLen=%d [%d out]\n", conn->getDebugID().toString().c_str(), e.name(), earlyResponse, elapsed, verb.c_str(), resource.c_str(), contentLen, total_sent); + } event.error(e); throw; } diff --git a/fdbclient/HTTP.h b/fdbclient/HTTP.h index 7d481a5eb1..476f6240d3 100644 --- a/fdbclient/HTTP.h +++ b/fdbclient/HTTP.h @@ -51,5 +51,5 @@ namespace HTTP { PacketBuffer * writeRequestHeader(std::string const &verb, std::string const &resource, HTTP::Headers const &headers, PacketBuffer *dest); // Do an HTTP request to the blob store, parse the response. - Future> doRequest(Reference const &conn, std::string const &verb, std::string const &resource, HTTP::Headers const &headers, UnsentPacketQueue * const &pContent, int const &contentLen, Reference const &sendRate, int64_t * const &pSent, Reference const &recvRate); + Future> doRequest(Reference const &conn, std::string const &verb, std::string const &resource, HTTP::Headers const &headers, UnsentPacketQueue * const &pContent, int const &contentLen, Reference const &sendRate, int64_t * const &pSent, Reference const &recvRate, const std::string &requestHeader = std::string()); } diff --git a/fdbclient/Knobs.cpp b/fdbclient/Knobs.cpp index a39316cfd6..d68e445d2c 100644 --- a/fdbclient/Knobs.cpp +++ b/fdbclient/Knobs.cpp @@ -148,6 +148,7 @@ ClientKnobs::ClientKnobs(bool randomize) { init( HTTP_READ_SIZE, 128*1024 ); init( HTTP_SEND_SIZE, 32*1024 ); init( HTTP_VERBOSE_LEVEL, 0 ); + init( HTTP_REQUEST_ID_HEADER, "" ); init( BLOBSTORE_CONNECT_TRIES, 10 ); init( BLOBSTORE_CONNECT_TIMEOUT, 10 ); init( BLOBSTORE_MAX_CONNECTION_LIFE, 120 ); diff --git a/fdbclient/Knobs.h b/fdbclient/Knobs.h index 6bd3875641..19067a0b27 100644 --- a/fdbclient/Knobs.h +++ b/fdbclient/Knobs.h @@ -152,6 +152,7 @@ public: int HTTP_SEND_SIZE; int HTTP_READ_SIZE; int HTTP_VERBOSE_LEVEL; + std::string HTTP_REQUEST_ID_HEADER; int BLOBSTORE_CONNECT_TRIES; int BLOBSTORE_CONNECT_TIMEOUT; int BLOBSTORE_MAX_CONNECTION_LIFE; From 295136900694e7328a446a05e4a2e94867f99f16 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 8 Jan 2019 16:28:40 -0800 Subject: [PATCH 48/55] When starting a restore, if range files are missing their names will be logged before an error is thrown. The error thrown is now restore_missing_data instead of restore_corrupted_data. --- fdbclient/BackupContainer.actor.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index dc7dc1734e..51345e7dea 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -361,14 +361,34 @@ public: throw restore_corrupted_data(); std::vector results; + int missing = 0; + for(auto const &fileValue : filesArray.get_array()) { if(fileValue.type() != json_spirit::str_type) throw restore_corrupted_data(); - auto i = rangeIndex.find(fileValue.get_str()); - if(i == rangeIndex.end()) - throw restore_corrupted_data(); - results.push_back(i->second); + // If the file is not in the index then log the error but don't throw yet, keep checking the whole list. + auto i = rangeIndex.find(fileValue.get_str()); + if(i == rangeIndex.end()) { + TraceEvent(SevError, "FileRestoreMissingRangeFile") + .detail("URL", bc->getURL()) + .detail("File", fileValue.get_str()); + + ++missing; + } + + // No point in using more memory once data is missing since an error will be thrown instead. + if(missing == 0) { + results.push_back(i->second); + } + } + + if(missing > 0) { + TraceEvent(SevError, "FileRestoreMissingRangeFileSummary") + .detail("URL", bc->getURL()) + .detail("Count", missing); + + throw restore_missing_data(); } return results; From bb2de3479baa74b5aeb9b7a5d8737657a7edd20c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 8 Jan 2019 16:29:00 -0800 Subject: [PATCH 49/55] Updated unit test to new backup container behavior. --- fdbclient/BackupContainer.actor.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fdbclient/BackupContainer.actor.cpp b/fdbclient/BackupContainer.actor.cpp index 51345e7dea..f85e6c80d2 100644 --- a/fdbclient/BackupContainer.actor.cpp +++ b/fdbclient/BackupContainer.actor.cpp @@ -1693,7 +1693,7 @@ ACTOR Future testBackupContainer(std::string url) { try { Void _ = wait(c->deleteContainer()); } catch(Error &e) { - if(e.code() != error_code_backup_invalid_url) + if(e.code() != error_code_backup_invalid_url && e.code() != error_code_backup_does_not_exist) throw; } @@ -1798,10 +1798,9 @@ ACTOR Future testBackupContainer(std::string url) { printf("DELETING\n"); Void _ = wait(c->deleteContainer()); - BackupDescription d = wait(c->describeBackup()); - printf("\n%s\n", d.toString().c_str()); - ASSERT(d.snapshots.size() == 0); - ASSERT(!d.minLogBegin.present()); + state Future d = c->describeBackup(); + Void _ = wait(ready(d)); + ASSERT(d.isError() && d.getError().code() == error_code_backup_does_not_exist); BackupFileList empty = wait(c->dumpFileList()); ASSERT(empty.ranges.size() == 0); From 604ad062d5398047359c2d3a669512de52b0671c Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 8 Jan 2019 18:12:15 -0800 Subject: [PATCH 50/55] Updated backup correctness test to new behavior. WaitBackup() can now return the UID and BackupContainer atomically with the status code for a backup tag. --- fdbclient/BackupAgent.h | 2 +- fdbclient/FileBackupAgent.actor.cpp | 28 +++++++++----- .../workloads/BackupCorrectness.actor.cpp | 37 ++++++++++--------- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/fdbclient/BackupAgent.h b/fdbclient/BackupAgent.h index caff559b2e..e0dbe219f3 100644 --- a/fdbclient/BackupAgent.h +++ b/fdbclient/BackupAgent.h @@ -276,7 +276,7 @@ public: // stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it // will return when the backup directory is restorable. - Future waitBackup(Database cx, std::string tagName, bool stopWhenDone = true); + Future waitBackup(Database cx, std::string tagName, bool stopWhenDone = true, Reference *pContainer = nullptr, UID *pUID = nullptr); static const Key keyLastRestorable; diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index e5fd4a27a3..a0aff5b638 100755 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -3379,8 +3379,9 @@ class FileBackupAgentImpl { public: static const int MAX_RESTORABLE_FILE_METASECTION_BYTES = 1024 * 8; - // This method will return the final status of the backup - ACTOR static Future waitBackup(FileBackupAgent* backupAgent, Database cx, std::string tagName, bool stopWhenDone) { + // This method will return the final status of the backup at tag, and return the URL that was used on the tag + // when that status value was read. + ACTOR static Future waitBackup(FileBackupAgent* backupAgent, Database cx, std::string tagName, bool stopWhenDone, Reference *pContainer = nullptr, UID *pUID = nullptr) { state std::string backTrace; state KeyBackedTag tag = makeBackupTag(tagName); @@ -3398,13 +3399,20 @@ public: state BackupConfig config(oldUidAndAborted.get().first); state EBackupState status = wait(config.stateEnum().getD(tr, false, EBackupState::STATE_NEVERRAN)); - // Break, if no longer runnable - if (!FileBackupAgent::isRunnable(status)) { - return status; - } + // Break, if one of the following is true + // - no longer runnable + // - in differential mode (restorable) and stopWhenDone is not enabled + if( !FileBackupAgent::isRunnable(status) || (!stopWhenDone) && (BackupAgentBase::STATE_DIFFERENTIAL == status) ) { + + if(pContainer != nullptr) { + Reference c = wait(config.backupContainer().getOrThrow(tr, false, backup_invalid_info())); + *pContainer = c; + } + + if(pUID != nullptr) { + *pUID = oldUidAndAborted.get().first; + } - // Break, if in differential mode (restorable) and stopWhenDone is not enabled - if ((!stopWhenDone) && (BackupAgentBase::STATE_DIFFERENTIAL == status)) { return status; } @@ -4080,7 +4088,7 @@ void FileBackupAgent::setLastRestorable(Reference tr, tr->set(lastRestorable.pack(tagName), BinaryWriter::toValue(version, Unversioned())); } -Future FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone) { - return FileBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone); +Future FileBackupAgent::waitBackup(Database cx, std::string tagName, bool stopWhenDone, Reference *pContainer, UID *pUID) { + return FileBackupAgentImpl::waitBackup(this, cx, tagName, stopWhenDone, pContainer, pUID); } diff --git a/fdbserver/workloads/BackupCorrectness.actor.cpp b/fdbserver/workloads/BackupCorrectness.actor.cpp index 834c1db5af..216737b38e 100644 --- a/fdbserver/workloads/BackupCorrectness.actor.cpp +++ b/fdbserver/workloads/BackupCorrectness.actor.cpp @@ -188,40 +188,43 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload { if (BUGGIFY) { state KeyBackedTag backupTag = makeBackupTag(tag.toString()); TraceEvent("BARW_DoBackupWaitForRestorable", randomID).detail("Tag", backupTag.tagName); - // Wait until the backup is in a restorable state - state int resultWait = wait(backupAgent->waitBackup(cx, backupTag.tagName, false)); - UidAndAbortedFlagT uidFlag = wait(backupTag.getOrThrow(cx)); - state UID logUid = uidFlag.first; - state Reference lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getD(cx)); + + // Wait until the backup is in a restorable state and get the status, URL, and UID atomically + state Reference lastBackupContainer; + state UID lastBackupUID; + state int resultWait = wait(backupAgent->waitBackup(cx, backupTag.tagName, false, &lastBackupContainer, &lastBackupUID)); state bool restorable = false; if(lastBackupContainer) { - state BackupDescription desc = wait(lastBackupContainer->describeBackup()); - Void _ = wait(desc.resolveVersionTimes(cx)); - printf("BackupDescription:\n%s\n", desc.toString().c_str()); - restorable = desc.maxRestorableVersion.present(); + state Future fdesc = lastBackupContainer->describeBackup(); + Void _ = wait(ready(fdesc)); + + if(!fdesc.isError()) { + state BackupDescription desc = fdesc.get(); + Void _ = wait(desc.resolveVersionTimes(cx)); + printf("BackupDescription:\n%s\n", desc.toString().c_str()); + restorable = desc.maxRestorableVersion.present(); + } } TraceEvent("BARW_LastBackupContainer", randomID) .detail("BackupTag", printable(tag)) .detail("LastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "") - .detail("LogUid", logUid).detail("WaitStatus", resultWait).detail("Restorable", restorable); + .detail("LastBackupUID", lastBackupUID).detail("WaitStatus", resultWait).detail("Restorable", restorable); // Do not check the backup, if aborted if (resultWait == BackupAgentBase::STATE_ABORTED) { } // Ensure that a backup container was found else if (!lastBackupContainer) { - TraceEvent("BARW_MissingBackupContainer", randomID).detail("LogUid", logUid).detail("BackupTag", printable(tag)).detail("WaitStatus", resultWait); + TraceEvent(SevError, "BARW_MissingBackupContainer", randomID).detail("LastBackupUID", lastBackupUID).detail("BackupTag", printable(tag)).detail("WaitStatus", resultWait); printf("BackupCorrectnessMissingBackupContainer tag: %s status: %d\n", printable(tag).c_str(), resultWait); } // Check that backup is restorable - else { - if(!restorable) { - TraceEvent("BARW_NotRestorable", randomID).detail("LogUid", logUid).detail("BackupTag", printable(tag)) - .detail("BackupFolder", lastBackupContainer->getURL()).detail("WaitStatus", resultWait); - printf("BackupCorrectnessNotRestorable: tag: %s\n", printable(tag).c_str()); - } + else if(!restorable) { + TraceEvent(SevError, "BARW_NotRestorable", randomID).detail("LastBackupUID", lastBackupUID).detail("BackupTag", printable(tag)) + .detail("BackupFolder", lastBackupContainer->getURL()).detail("WaitStatus", resultWait); + printf("BackupCorrectnessNotRestorable: tag: %s\n", printable(tag).c_str()); } // Abort the backup, if not the first backup because the second backup may have aborted the backup by now From e7f800b01983b01741c46aeea271676e3be2ba81 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Tue, 8 Jan 2019 19:00:58 -0800 Subject: [PATCH 51/55] Restore start with dry run option no longer requires or uses a database. Removed duplicate and unnecessary code. Dry run execution is more clear, it does not pretend to have actually restored data. Fixed typo in help text. --- fdbbackup/backup.actor.cpp | 72 +++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 40 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 7b5da88cc6..cb602bc960 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -793,7 +793,7 @@ static void printBackupUsage(bool devhelp) { printf(" -e ERRORLIMIT The maximum number of errors printed by status (default is 10).\n"); printf(" -k KEYS List of key ranges to backup.\n" " If not specified, the entire database will be backed up.\n"); - printf(" -n, --dry-run For start or restore operations, performs a trial run with no actual changes made.\n"); + printf(" -n, --dryrun For start or restore operations, performs a trial run with no actual changes made.\n"); printf(" -v, --version Print version information and exit.\n"); printf(" -w, --wait Wait for the backup to complete (allowed with `start' and `discontinue').\n"); printf(" -z, --no-stop-when-done\n" @@ -837,7 +837,7 @@ static void printRestoreUsage(bool devhelp ) { printf(" -k KEYS List of key ranges from the backup to restore\n"); printf(" --remove_prefix PREFIX prefix to remove from the restored keys\n"); printf(" --add_prefix PREFIX prefix to add to the restored keys\n"); - printf(" -n, --dry-run Perform a trial run with no changes made.\n"); + printf(" -n, --dryrun Perform a trial run with no changes made.\n"); printf(" -v DBVERSION The version at which the database will be restored.\n"); printf(" -h, --help Display this help and exit.\n"); @@ -1787,11 +1787,10 @@ ACTOR Future changeDBBackupResumed(Database src, Database dest, bool pause return Void(); } -ACTOR Future runRestore(Database db, std::string tagName, std::string container, Standalone> ranges, Version dbVersion, bool performRestore, bool verbose, bool waitForDone, std::string addPrefix, std::string removePrefix) { +ACTOR Future runRestore(Database db, std::string tagName, std::string container, Standalone> ranges, Version targetVersion, bool performRestore, bool verbose, bool waitForDone, std::string addPrefix, std::string removePrefix) { try { state FileBackupAgent backupAgent; - state int64_t restoreVersion = -1; if(ranges.size() > 1) { fprintf(stderr, "Currently only a single restore range is supported!\n"); @@ -1800,52 +1799,45 @@ ACTOR Future runRestore(Database db, std::string tagName, std::string cont state KeyRange range = (ranges.size() == 0) ? normalKeys : ranges.front(); - if (performRestore) { - if(dbVersion == invalidVersion) { - BackupDescription desc = wait(IBackupContainer::openContainer(container)->describeBackup()); - if(!desc.maxRestorableVersion.present()) { - fprintf(stderr, "The specified backup is not restorable to any version.\n"); - throw restore_error(); - } + state Reference bc = IBackupContainer::openContainer(container); - dbVersion = desc.maxRestorableVersion.get(); + // If targetVersion is unset then use the maximum restorable version from the backup description + if(targetVersion == invalidVersion) { + if(verbose) + printf("No restore target version given, will use maximum restorable version from backup description.\n"); + + BackupDescription desc = wait(bc->describeBackup()); + + if(!desc.maxRestorableVersion.present()) { + fprintf(stderr, "The specified backup is not restorable to any version.\n"); + throw restore_error(); + } + + targetVersion = desc.maxRestorableVersion.get(); + + if(verbose) + printf("Using target restore version %lld\n", targetVersion); + } + + if (performRestore) { + Version restoredVersion = wait(backupAgent.restore(db, KeyRef(tagName), KeyRef(container), waitForDone, targetVersion, verbose, range, KeyRef(addPrefix), KeyRef(removePrefix))); + + if(waitForDone && verbose) { + // If restore is now complete then report version restored + printf("Restored to version %lld\n", restoredVersion); } - Version _restoreVersion = wait(backupAgent.restore(db, KeyRef(tagName), KeyRef(container), waitForDone, dbVersion, verbose, range, KeyRef(addPrefix), KeyRef(removePrefix))); - restoreVersion = _restoreVersion; } else { - state Reference bc = IBackupContainer::openContainer(container); - state BackupDescription description = wait(bc->describeBackup()); + state Optional rset = wait(bc->getRestoreSet(targetVersion)); - if(dbVersion <= 0) { - Void _ = wait(description.resolveVersionTimes(db)); - if(description.maxRestorableVersion.present()) - restoreVersion = description.maxRestorableVersion.get(); - else { - fprintf(stderr, "Backup is not restorable\n"); - throw restore_invalid_version(); - } - } - else - restoreVersion = dbVersion; - - state Optional rset = wait(bc->getRestoreSet(restoreVersion)); if(!rset.present()) { - fprintf(stderr, "Insufficient data to restore to version %lld\n", restoreVersion); + fprintf(stderr, "Insufficient data to restore to version %lld. Describe backup for more information.\n", targetVersion); throw restore_invalid_version(); } - // Display the restore information, if requested - if (verbose) { - printf("[DRY RUN] Restoring backup to version: %lld\n", (long long) restoreVersion); - printf("%s\n", description.toString().c_str()); - } + printf("Backup can be used to restore to version %lld\n", targetVersion); } - if(waitForDone && verbose) { - // If restore completed then report version restored - printf("Restored to version %lld%s\n", (long long) restoreVersion, (performRestore) ? "" : " (DRY RUN)"); - } } catch (Error& e) { if(e.code() == error_code_actor_cancelled) @@ -3036,7 +3028,7 @@ int main(int argc, char* argv[]) { break; case EXE_RESTORE: - if(!initCluster()) + if(!dryRun && !initCluster()) return FDB_EXIT_ERROR; switch(restoreType) { case RESTORE_START: From f116678f0dc01c9d3c416d5caf1b752adfa65e14 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Wed, 9 Jan 2019 03:54:13 -0800 Subject: [PATCH 52/55] Bug fix, restore start dry run would not open a trace file. --- fdbbackup/backup.actor.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index cb602bc960..7bef1a33a5 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -3028,8 +3028,13 @@ int main(int argc, char* argv[]) { break; case EXE_RESTORE: - if(!dryRun && !initCluster()) + if(dryRun) { + initTraceFile(); + } + else if(!initCluster()) { return FDB_EXIT_ERROR; + } + switch(restoreType) { case RESTORE_START: f = stopAfter( runRestore(db, tagName, restoreContainer, backupKeys, dbVersion, !dryRun, !quietDisplay, waitForDone, addPrefix, removePrefix) ); From c23c8fba3e0c185e3d9f44c4e697cfa853fe9c8f Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 9 Jan 2019 14:27:56 -0800 Subject: [PATCH 53/55] update installer WIX GUID following release --- packaging/msi/FDBInstaller.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index d8278c2e91..0053c34ca8 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@ Date: Wed, 9 Jan 2019 15:31:59 -0800 Subject: [PATCH 54/55] update versions target to 6.0.19 --- versions.target | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.target b/versions.target index 195bb478a2..2eeecd81d5 100644 --- a/versions.target +++ b/versions.target @@ -1,7 +1,7 @@ - 6.0.18 + 6.0.19 6.0 From 36cf8addd552dd37b73e21630c64320b6441ab4e Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Wed, 9 Jan 2019 15:32:00 -0800 Subject: [PATCH 55/55] update installer WIX GUID following release --- packaging/msi/FDBInstaller.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/msi/FDBInstaller.wxs b/packaging/msi/FDBInstaller.wxs index 0053c34ca8..769d8abb1e 100644 --- a/packaging/msi/FDBInstaller.wxs +++ b/packaging/msi/FDBInstaller.wxs @@ -32,7 +32,7 @@