IBackupContainer has been rewritten to be a logical interface for storing, reading, deleting, expiring, and querying backup data. The details of how the data is organized or stored is now hidden from users of the interface. Both the local and blobstore containers have been rewritten, the key changes being a multi level directory structure and no more use of temporary files or pseudo-symlinks in the blob store implementation. This refactor has a large impact radius as the previous backup container was just a thin wrapper that presented a single level list of files and offered no methods for managing or interpreting the file structure so all of that logic was spread around other places in the code base. This made moving to the new blob store schema very messy, and without this refactor further changes in the future would only be worse.

Several backup tasks have been cleaned up / simplified because they no longer need to manage the ‘raw’ structure of the backup. The addition of IBackupFile and its finish() method simplified the log and range writer tasks. Updated BlobStoreEndpoint to support now-required bucket creation and bucket listing prefix/delimiter options for finding common prefixes. Added KeyBackedSet<T> type. Moved JSONDoc to its own header. Added platform::findFilesRecursively(). Still to do: update command line tool to use new IBackupContainer interface, fix bugs in Restore startup.
2017-11-14 23:33:17 -08:00 · 2017-11-14 23:33:17 -08:00 · 3dfaf13b67
parent 45fa3680fa
commit 3dfaf13b67
17 changed files with 1788 additions and 1741 deletions
--- a/fdbclient/BackupAgent.h
+++ b/fdbclient/BackupAgent.h
@ -30,6 +30,7 @@
 #include "KeyBackedTypes.h"
 #include <ctime>
 #include <climits>
+#include "BackupContainer.h"

 class BackupAgentBase : NonCopyable {
 public:
@ -262,6 +263,7 @@ public:
 	Future<std::string> getStatus(Database cx, int errorLimit, std::string tagName);

 	Future<Version> getLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName);
+	void setLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName, Version version);

 	// stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it
 	// will return when the backup directory is restorable.
@ -269,14 +271,6 @@ public:

 	static Future<std::string> getBackupInfo(std::string backupContainer, Version* defaultVersion = NULL);

-	static std::string getTempFilename();
-	// Data(key ranges) and Log files will have their file size in the name because it is not at all convenient
-	// to fetch filesizes from either of the current BackupContainer implementations. LocalDirectory requires
-	// querying each file separately, and Blob Store doesn't support renames so the apparent log and data files
-	// are actually a kind of symbolic link so to get the size of the final file it would have to be read.
-	static std::string getDataFilename(Version version, int64_t size, int blockSize);
-	static std::string getLogFilename(Version beginVer, Version endVer, int64_t size, int blockSize);
-
 	Future<int64_t> getTaskCount(Reference<ReadYourWritesTransaction> tr) { return taskBucket->getTaskCount(tr); }
 	Future<int64_t> getTaskCount(Database cx) { return taskBucket->getTaskCount(cx); }
 	Future<Void> watchTaskCount(Reference<ReadYourWritesTransaction> tr) { return taskBucket->watchTaskCount(tr); }
@ -562,14 +556,39 @@ protected:
 	Subspace configSpace;
 };

+template<> inline Tuple Codec<Reference<IBackupContainer>>::pack(Reference<IBackupContainer> const &bc) { 
+	return Tuple().append(StringRef(bc->getURL()));
+}
+template<> inline Reference<IBackupContainer> Codec<Reference<IBackupContainer>>::unpack(Tuple const &val) {
+	return IBackupContainer::openContainer(val.getString(0).toString());
+}
+
 class BackupConfig : public KeyBackedConfig {
 public:
 	BackupConfig(UID uid = UID()) : KeyBackedConfig(fileBackupPrefixRange.begin, uid) {}
 	BackupConfig(Reference<Task> task) : KeyBackedConfig(fileBackupPrefixRange.begin, task) {}

 	// rangeFileMap maps a keyrange file's End to its Begin and Filename
-	typedef std::pair<Key, Key> KeyAndFilenameT;
-	typedef KeyBackedMap<Key, KeyAndFilenameT> RangeFileMapT;
+	struct RangeSlice {
+		Key begin;
+		Version version;
+		std::string fileName;
+		int64_t fileSize;
+		Tuple pack() const {
+			return Tuple().append(begin).append(version).append(StringRef(fileName)).append(fileSize);
+		}
+		static RangeSlice unpack(Tuple const &t) {
+			RangeSlice r;
+			int i = 0;
+			r.begin = t.getString(i++);
+			r.version = t.getInt(i++);
+			r.fileName = t.getString(i++).toString();
+			r.fileSize = t.getInt(i++);
+			return r;
+		}
+	};
+
+	typedef KeyBackedMap<Key, RangeSlice> RangeFileMapT;
 	RangeFileMapT rangeFileMap() {
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
 	}
@ -586,10 +605,15 @@ public:
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
 	}

-	KeyBackedProperty<std::string> backupContainer() {
+	KeyBackedProperty<Reference<IBackupContainer>> backupContainer() {
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
 	}

+	// Get the backup container URL only without creating a backup container instance.
+	KeyBackedProperty<Reference<IBackupContainer>> backupContainerURL() {
+		return configSpace.pack(LiteralStringRef("backupContainer"));
+	}
+
 	// Stop differntial logging if already started or don't start after completing KV ranges
 	KeyBackedProperty<bool> stopWhenDone() {
 		return configSpace.pack(LiteralStringRef(__FUNCTION__));
--- a/fdbclient/BackupAgentBase.actor.cpp
+++ b/fdbclient/BackupAgentBase.actor.cpp
@ -364,6 +364,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RangeResultWithVersi
 				begin = firstGreaterThan(values.end()[-1].key);

 			if (!values.more && !limits.isReached()) {
+				if(terminator)
 					results.sendError(end_of_stream());
 				return Void();
 			}
@ -456,6 +457,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
 					results.send(rcGroup);
 				}

+				if(terminator)
 					results.sendError(end_of_stream());
 				return Void();
 			}
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
--- a/fdbclient/BackupContainer.h
+++ b/fdbclient/BackupContainer.h
@ -23,71 +23,149 @@
 #include "flow/flow.h"
 #include "fdbrpc/IAsyncFile.h"
 #include "fdbrpc/BlobStore.h"
+#include "FDBTypes.h"
 #include <vector>

-// Class representing a container for backup files, such as a mounted directory or a remote filesystem.
+// Append-only file interface for writing backup data
+// TODO: Move the log file and range file format encoder/decoder classes to this file, probably as part of IBackupFile.
+class IBackupFile {
+public:
+	IBackupFile(std::string fileName) : m_fileName(fileName), m_offset(0) {}
+	virtual ~IBackupFile() {}
+	// Backup files are append-only and cannot have more than 1 append outstanding at once.
+	virtual Future<Void> append(StringRef data) = 0;
+	virtual Future<Void> finish() = 0;
+	inline std::string getFileName() const {
+		return m_fileName;
+	}
+	inline int64_t size() const {
+		return m_offset;
+	}
+	virtual void addref() = 0;
+	virtual void delref() = 0;
+protected:
+	std::string m_fileName;
+	int64_t m_offset;
+};
+
+// Structures for various backup components
+
+struct LogFile {
+	Version beginVersion;
+	Version endVersion;
+	uint32_t blockSize;
+	std::string fileName;
+	int64_t fileSize;
+
+	// Order by beginVersion, break ties with endVersion
+	bool operator< (const LogFile &rhs) const {
+		return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
+	}
+};
+
+struct RangeFile {
+	Version version;
+	uint32_t blockSize;
+	std::string fileName;
+	int64_t fileSize;
+
+	// Order by version, break ties with name
+	bool operator< (const RangeFile &rhs) const {
+		return version == rhs.version ? fileName < rhs.fileName : version < rhs.version;
+	}
+};
+
+struct KeyspaceSnapshotFile {
+	Version beginVersion;
+	Version endVersion;
+	std::string fileName;
+	int64_t totalSize;
+
+	// Order by beginVersion, break ties with endVersion
+	bool operator< (const KeyspaceSnapshotFile &rhs) const {
+		return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
+	}
+};
+
+struct BackupDescription {
+	std::string url;
+	std::vector<KeyspaceSnapshotFile> snapshots;
+	Optional<Version> minLogBegin;
+	Optional<Version> maxLogEnd;
+	Optional<Version> contiguousLogEnd;
+	Optional<Version> maxRestorableVersion;
+	Optional<Version> minRestorableVersion;
+	std::string extendedDetail;  // Freeform container-specific info.
+	std::string toString() const;
+};
+
+struct RestorableFileSet {
+	Version targetVersion;
+	std::vector<LogFile> logs;
+	std::vector<RangeFile> ranges;
+};
+
+/* IBackupContainer is an interface to a set of backup data, which contains
+ *   - backup metadata
+ *   - log files
+ *   - range files
+ *   - keyspace snapshot files defining a complete non overlapping key space snapshot
+ *
+ * Files in a container are identified by a name.  This can be any string, whatever
+ * makes sense for the underlying storage system.
+ *
+ * Reading files is done by file name.  File names are discovered by getting a RestorableFileSet.
+ *
+ * For remote data stores that are filesystem-like, it's probably best to inherit BackupContainerFileSystem.
+ */
 class IBackupContainer {
 public:
 	virtual void addref() = 0;
 	virtual void delref() = 0;

-	enum EMode { READONLY, WRITEONLY };
-
-	static std::vector<std::string> getURLFormats();
-
 	IBackupContainer() {}
 	virtual ~IBackupContainer() {}

-	// Create the container (if necessary)
+	// Create the container
 	virtual Future<Void> create() = 0;

-	// Open a named file in the container for reading (restore mode) or writing (backup mode)
-	virtual Future<Reference<IAsyncFile>> openFile(std::string name, EMode mode) = 0;
+	// Open a log file or range file for writing
+	virtual Future<Reference<IBackupFile>> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0;
+	virtual Future<Reference<IBackupFile>> writeRangeFile(Version version, int blockSize) = 0;

-	// Returns whether or not a file exists in the container
-	virtual Future<bool> fileExists(std::string name) = 0;
+	// Write a KeyspaceSnapshotFile of range file names representing a full non overlapping
+	// snapshot of the key ranges this backup is targeting.
+	virtual Future<Void> writeKeyspaceSnapshotFile(std::vector<std::string> fileNames, int64_t totalBytes) = 0;

-	// Get a list of backup files in the container
-	virtual Future<std::vector<std::string>> listFiles() = 0;
+	// Open a file for read by name
+	virtual Future<Reference<IAsyncFile>> readFile(std::string name) = 0;

-	// Rename a file
-	virtual Future<Void> renameFile(std::string from, std::string to) = 0;
+	// Delete all data up to (but not including endVersion)
+	virtual Future<Void> expireData(Version endVersion) = 0;
+
+	// Delete entire container.  During the process, if pNumDeleted is not null it will be
+	// updated with the count of deleted files so that progress can be seen.
+	virtual Future<Void> deleteContainer(int *pNumDeleted = nullptr) = 0;
+
+	// Uses the virtual methods to describe the backup contents
+	virtual Future<BackupDescription> describeBackup() = 0;
+
+	// Get exactly the files necessary to restore to targetVersion.  Returns non-present if
+	// restore to given version is not possible.
+	virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion) = 0;

 	// Get an IBackupContainer based on a container spec string
-	static Reference<IBackupContainer> openContainer(std::string url, std::string *error = nullptr);
-};
-
-class BackupContainerBlobStore : public IBackupContainer, ReferenceCounted<BackupContainerBlobStore> {
-public:
-	void addref() { return ReferenceCounted<BackupContainerBlobStore>::addref(); }
-	void delref() { return ReferenceCounted<BackupContainerBlobStore>::delref(); }
-	static const std::string META_BUCKET;
-
-	static std::string getURLFormat() { return BlobStoreEndpoint::getURLFormat(true); }
-	static Future<std::vector<std::string>> listBackupContainers(Reference<BlobStoreEndpoint> const &bs);
-
-    BackupContainerBlobStore(Reference<BlobStoreEndpoint> bstore, std::string name)
-	  : m_bstore(bstore), m_bucketPrefix(name) {}
-
-	virtual ~BackupContainerBlobStore() { m_bucketCount.cancel(); }
-
-	// IBackupContainer methods
-	Future<Void> create();
-	Future<Reference<IAsyncFile>> openFile(std::string name, EMode mode);
-	Future<bool> fileExists(std::string name);
-	Future<Void> renameFile(std::string from, std::string to);
-	Future<std::vector<std::string>> listFiles();
-	Future<Void> listFilesStream(PromiseStream<BlobStoreEndpoint::ObjectInfo> results);
-
-	Future<Void> deleteContainer(int *pNumDeleted = NULL);
-	Future<std::string> containerInfo();
-	Future<int> getBucketCount();
-	std::string getBucketString(int num) { return format("%s_%d", m_bucketPrefix.c_str(), num); }
-	Future<std::string> getBucketForFile(std::string const &name);
-	Future<std::vector<std::string>> getBucketList();
-
-	Reference<BlobStoreEndpoint> m_bstore;
-	std::string m_bucketPrefix;
-	Future<int> m_bucketCount;
+	static Reference<IBackupContainer> openContainer(std::string url);
+	static std::vector<std::string> getURLFormats();
+	static std::vector<std::string> listContainers(std::string baseURL);
+
+	std::string getURL() const {
+		return URL;
+	}
+
+	static std::string lastOpenError;
+
+private:
+	std::string URL;
 };

--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
--- a/fdbclient/KeyBackedTypes.h
+++ b/fdbclient/KeyBackedTypes.h
@ -135,20 +135,32 @@ public:
 	}

 	Future<Optional<T>> get(Database cx, bool snapshot = false) const {
+		auto &copy = *this;
 		return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) {
 			tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 			tr->setOption(FDBTransactionOptions::LOCK_AWARE);

-			return get(tr, snapshot);
+			return copy.get(tr, snapshot);
+		});
+	}
+
+	Future<T> getD(Database cx, bool snapshot = false, T defaultValue = T()) const {
+		auto &copy = *this;
+		return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) {
+			tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
+			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
+
+			return copy.getD(tr, snapshot, defaultValue);
 		});
 	}

 	Future<T> getOrThrow(Database cx, bool snapshot = false, Error err = key_not_found()) const {
+		auto &copy = *this;
 		return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) {
 			tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
 			tr->setOption(FDBTransactionOptions::LOCK_AWARE);

-			return getOrThrow(tr, snapshot, err);
+			return copy.getOrThrow(tr, snapshot, err);
 		});
 	}

@ -267,3 +279,53 @@ public:

 	Subspace space;
 };
+
+template <typename _ValueType>
+class KeyBackedSet {
+public:
+	KeyBackedSet(KeyRef key) : space(key) {}
+
+	typedef _ValueType ValueType;
+	typedef std::vector<ValueType> Values;
+
+	// If end is not present one key past the end of the map is used.
+	Future<Values> getRange(Reference<ReadYourWritesTransaction> tr, ValueType const &begin, Optional<ValueType> const &end, int limit, bool snapshot = false) const {
+		Subspace s = space;  // 'this' could be invalid inside lambda
+		Key endKey = end.present() ? s.pack(Codec<ValueType>::pack(end.get())) : space.range().end;
+		return map(tr->getRange(KeyRangeRef(s.pack(Codec<ValueType>::pack(begin)), endKey), GetRangeLimits(limit), snapshot),
+					[s] (Standalone<RangeResultRef> const &kvs) -> Values {
+						Values results;
+						for(int i = 0; i < kvs.size(); ++i) {
+							results.push_back(Codec<ValueType>::unpack(s.unpack(kvs[i].key)));
+						}
+						return results;
+					});
+	}
+
+	Future<bool> exists(Reference<ReadYourWritesTransaction> tr, ValueType const &val, bool snapshot = false) const {
+		return map(tr->get(space.pack(Codec<ValueType>::pack(val)), snapshot), [](Optional<Value> const &val) -> bool {
+			return val.present();
+		});
+	}
+
+	// Returns the expectedSize of the set key
+	int insert(Reference<ReadYourWritesTransaction> tr, ValueType const &val) {
+		Key k = space.pack(Codec<ValueType>::pack(val));
+		tr->set(k, StringRef());
+		return k.expectedSize();
+	}
+
+	void erase(Reference<ReadYourWritesTransaction> tr, ValueType const &val) {
+		return tr->clear(space.pack(Codec<ValueType>::pack(val)));
+	}
+
+	void erase(Reference<ReadYourWritesTransaction> tr, ValueType const &begin, ValueType const &end) {
+		return tr->clear(KeyRangeRef(space.pack(Codec<ValueType>::pack(begin)), space.pack(Codec<ValueType>::pack(end))));
+	}
+
+	void clear(Reference<ReadYourWritesTransaction> tr) {
+		return tr->clear(space.range());
+	}
+
+	Subspace space;
+};
--- a/fdbclient/Knobs.cpp
+++ b/fdbclient/Knobs.cpp
@ -152,7 +152,6 @@ ClientKnobs::ClientKnobs(bool randomize) {
 	init( BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE,      2 );
 	init( BLOBSTORE_MULTIPART_MAX_PART_SIZE,  20000000 );
 	init( BLOBSTORE_MULTIPART_MIN_PART_SIZE,   5242880 );
-	init( BLOBSTORE_BACKUP_BUCKETS,                100 );

 	// These are basically unlimited by default but can be used to reduce blob IO if needed
 	init( BLOBSTORE_REQUESTS_PER_SECOND,            200 );
--- a/fdbclient/Knobs.h
+++ b/fdbclient/Knobs.h
@ -158,7 +158,6 @@ public:
 	int BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE;
 	int BLOBSTORE_MAX_SEND_BYTES_PER_SECOND;
 	int BLOBSTORE_MAX_RECV_BYTES_PER_SECOND;
-	int BLOBSTORE_BACKUP_BUCKETS;

 	int CONSISTENCY_CHECK_RATE_LIMIT;
 	int CONSISTENCY_CHECK_RATE_WINDOW;
--- a/fdbclient/Status.h
+++ b/fdbclient/Status.h
@ -21,8 +21,7 @@
 #ifndef FDBCLIENT_STATUS_H
 #define FDBCLIENT_STATUS_H

-#include "json_spirit/json_spirit_writer_template.h"
-#include "json_spirit/json_spirit_reader_template.h"
+#include "../fdbrpc/JSONDoc.h"

 struct StatusObject : json_spirit::mObject {
 	typedef json_spirit::mObject Map;
@ -71,291 +70,6 @@ static StatusObject makeMessage(const char *name, const char *description) {
 	return out;
 }

-// JSONDoc is a convenient reader/writer class for manipulating JSON documents using "paths".
-// Access is done using a "path", which is a string of dot-separated
-// substrings representing representing successively deeper keys found in nested
-// JSON objects within the top level object
-//
-// Most methods are read-only with respect to the source JSON object.
-// The only modifying methods are create(), put(), subDoc(), and mergeInto()
-//
-// JSONDoc maintains some state which is the JSON value that was found during the most recent
-// *successful* path lookup.
-//
-// Examples:
-//    StatusObjectReader r(some_obj);
-//
-//    // See if JSON doc path a.b.c exists
-//    bool exists = r.has("a.b.c");
-//
-//    // See if JSON doc path a.b.c exists, if it does then assign value to x.  Throws if path exists but T is not compatible.
-//    T x;
-//    bool exists = r.has("a.b.c", x);
-//
-//    // This way you can chain things like this:
-//    bool is_two = r.has("a.b.c", x) && x == 2;
-//
-//    // Alternatively, you can avoid the temp var by making use of the last() method which returns a reference
-//    // to the JSON value at the last successfully found path that has() has seen.
-//    bool is_int = r.has("a.b.c") && r.last().type == json_spirit::int_type;
-//    bool is_two = r.has("a.b.c") && r.last().get_int() == 2;
-//
-//    // The familiar at() method also exists but now supports the same path concept.  
-//    // It will throw in the same circumstances as the original method
-//    int x = r.at("a.b.c").get_int();
-//
-//    // If you wish to access an element with the dot character within its name (e.g., "hostname.example.com"),
-//    // you can do so by setting the "split" flag to false in either the "has" or "get" methods. The example
-//    // below will look for the key "hostname.example.com" as a subkey of the path "a.b.c" (or, more
-//    // precisely, it will look to see if r.has("a").has("b").has("c").has("hostname.example.com", false)).
-//    bool exists = r.has("a.b.c").has("hostname.example.com", false);
-//
-//    // And the familiar operator[] interface exists as well, however only as a synonym for at()
-//    // because this class is only for reading.  Using operator [] will not auto-create null things.
-//    // The following would throw if a.b.c did not exist, or if it was not an int.
-//    int x = r["a.b.c"].get_int();
-struct JSONDoc {
-	JSONDoc() : pObj(NULL) {}
-
-	// Construction from const json_spirit::mObject, trivial and will never throw.
-	// Resulting JSONDoc will not allow modifications.
-	JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(NULL) {}
-
-	// Construction from json_spirit::mObject.  Allows modifications.
-	JSONDoc(json_spirit::mObject &o) : pObj(&o), wpObj(&o) {}
-
-	// Construction from const json_spirit::mValue (which is a Variant type) which will try to
-	// convert it to an mObject.  This will throw if that fails, just as it would
-	// if the caller called get_obj() itself and used the previous constructor instead.
-	JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(NULL) {}
-
-	// Construction from non-const json_spirit::mValue - will convert the mValue to
-	// an object if it isn't already and then attach to it.
-	JSONDoc(json_spirit::mValue &v) {
-		if(v.type() != json_spirit::obj_type)
-			v = json_spirit::mObject();
-		wpObj = &v.get_obj();
-		pObj = wpObj;
-	}
-
-	// Returns whether or not a "path" exists.
-	// Returns true if all elements along path exist
-	// Returns false if any elements along the path are MISSING
-	// Will throw if a non-terminating path element exists BUT is not a JSON Object.
-	// If the "split" flag is set to "false", then this skips the splitting of a
-	// path into on the "dot" character.
-	// When a path is found, pLast is updated.
-	bool has(std::string path, bool split=true) {
-		if (pObj == NULL)
-			return false;
-
-		if (path.empty())
-			return false;
-		size_t start = 0;
-		const json_spirit::mValue *curVal = NULL;
-		while (start < path.size())
-		{
-			// If a path segment is found then curVal must be an object
-			size_t dot;
-			if (split) {
-				dot = path.find_first_of('.', start);
-				if (dot == std::string::npos)
-					dot = path.size();
-			} else {
-				dot = path.size();
-			}
-			std::string key = path.substr(start, dot - start);
-
-			// Get pointer to the current Object that the key has to be in
-			// This will throw if the value is not an Object
-			const json_spirit::mObject *curObj = curVal ? &curVal->get_obj() : pObj;
-
-			// Make sure key exists, if not then return false
-			if (!curObj->count(key))
-				return false;
-
-			// Advance curVal
-			curVal = &curObj->at(key);
-
-			// Advance start position in path
-			start = dot + 1;
-		}
-
-		pLast = curVal;
-		return true;
-	}
-
-	// Creates the given path (forcing Objects to exist along its depth, replacing whatever else might have been there)
-	// and returns a reference to the Value at that location.
-	json_spirit::mValue & create(std::string path, bool split=true) {
-		if (wpObj == NULL || path.empty())
-			throw std::runtime_error("JSON Object not writable or bad JSON path");
-
-		size_t start = 0;
-		json_spirit::mValue *curVal = nullptr;
-		while (start < path.size())
-		{
-			// Get next path segment name
-			size_t dot;
-			if (split) {
-				dot = path.find_first_of('.', start);
-				if (dot == std::string::npos)
-					dot = path.size();
-			} else {
-				dot = path.size();
-			}
-			std::string key = path.substr(start, dot - start);
-			if(key.empty())
-				throw std::runtime_error("invalid JSON path");
-
-			// Get/create pointer to the current Object that the key has to be in
-			// If curVal is defined then force it to be an Object
-			json_spirit::mObject *curObj;
-			if(curVal != nullptr) {
-				if(curVal->type() != json_spirit::obj_type)
-					*curVal = json_spirit::mObject();
-				curObj = &curVal->get_obj();
-			}
-			else  // Otherwise start with the object *this is writing to
-				curObj = wpObj;
-
-			// Make sure key exists, if not then return false
-			if (!curObj->count(key))
-				(*curObj)[key] = json_spirit::mValue();
-
-			// Advance curVal
-			curVal = &((*curObj)[key]);
-
-			// Advance start position in path
-			start = dot + 1;
-		}
-
-		return *curVal;
-	}
-
-	// Creates the path given, puts a value at it, and returns a reference to the value
-	template<typename T>
-	T & put(std::string path, const T & value, bool split=true) {
-		json_spirit::mValue &v = create(path, split);
-		v = value;
-		return v.get_value<T>();
-	}
-
-	// Ensures that a an Object exists at path and returns a JSONDoc that writes to it.
-	JSONDoc subDoc(std::string path, bool split=true) {
-		json_spirit::mValue &v = create(path, split);
-		if(v.type() != json_spirit::obj_type)
-			v = json_spirit::mObject();
-		return JSONDoc(v.get_obj());
-	}
-
-	// Apply a merge operation to two values.  Works for int, double, and string
-	template <typename T>
-	static json_spirit::mObject mergeOperator(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, T const &a, T const &b) {
-		if(op == "$max")
-			return {{op, std::max<T>(a, b)}};
-		if(op == "$min")
-			return {{op, std::min<T>(a, b)}};
-		if(op == "$sum")
-			return {{op, a + b}};
-		throw std::exception();
-	}
-
-	// This is just a convenience function to make calling mergeOperator look cleaner
-	template <typename T>
-	static json_spirit::mObject mergeOperatorWrapper(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, const json_spirit::mValue &a, const json_spirit::mValue &b) {
-		return mergeOperator<T>(op, op_a, op_b, a.get_value<T>(), b.get_value<T>());
-	}
-
-	static inline std::string getOperator(const json_spirit::mObject &obj) {
-		for(auto &k : obj)
-			if(!k.first.empty() && k.first[0] == '$')
-				return k.first;
-		return std::string();
-	}
-
-	// Merge src into dest, applying merge operators
-	static void mergeInto(json_spirit::mObject &dst, const json_spirit::mObject &src);
-	static void mergeValueInto(json_spirit::mValue &d, const json_spirit::mValue &s);
-
-	// Remove any merge operators that never met any mates.
-	static void cleanOps(json_spirit::mObject &obj);
-	void cleanOps() {
-		if(wpObj == nullptr)
-			throw std::runtime_error("JSON Object not writable");
-
-		return cleanOps(*wpObj);
-	}
-
-	void absorb(const JSONDoc &doc) {
-		if(wpObj == nullptr)
-			throw std::runtime_error("JSON Object not writable");
-
-		if(doc.pObj == nullptr)
-			throw std::runtime_error("JSON Object not readable");
-
-		mergeInto(*wpObj, *doc.pObj);
-	}
-	
-	// Returns whether or not a "path" exists.
-	// Returns true if all elements along path exist
-	// Returns false if any elements along the path are MISSING
-	// Sets out to the value of the thing that path refers to
-	// Will throw if a non-terminating path element exists BUT is not a JSON Object.
-	// Will throw if all elements along path exists but T is an incompatible type
-	template <typename T> bool get(const std::string path, T &out, bool split=true) {
-		bool r = has(path, split);
-		if (r)
-			out = pLast->get_value<T>();
-		return r;
-	}
-
-	// For convenience, wraps get() in a try/catch and returns false UNLESS the path existed and was a compatible type.
-	template <typename T> bool tryGet(const std::string path, T &out, bool split=true) {
-		try { return get(path, out, split); } catch(...) {}
-		return false;
-	}
-
-	const json_spirit::mValue & at(const std::string path, bool split=true) {
-		if (has(path, split))
-			return last();
-		throw std::runtime_error("JSON path doesn't exist");
-	}
-
-	const json_spirit::mValue & operator[](const std::string path) {
-		return at(path);
-	}
-
-	const json_spirit::mValue & last() const { return *pLast; }
-	bool valid() const { return pObj != NULL; }
-
-	const json_spirit::mObject & obj() {
-		// This dummy object is necessary to make working with obj() easier when this does not currently
-		// point to a valid mObject.  valid() can be called to explicitly check for this scenario, but
-		// calling obj() at least will not seg fault and instead return a const reference to an empty mObject.
-		// This is very useful when iterating using obj() to access the underlying mObject.
-		static const json_spirit::mObject dummy;
-		return pObj ? *pObj : dummy;
-	}
-
-	// Return reference to writeable underlying mObject but only if *this was initialized with a writeable value or object
-	json_spirit::mObject & wobj() {
-		ASSERT(wpObj != nullptr);
-		return *wpObj;
-	}
-
-	// This is the version used to represent 'now' for use by the $expires operator.
-	// By default, nothing will expire and it is up to the user of JSONDoc to update this value if
-	// it is intended to be used.
-	// This is slightly hackish but otherwise the JSON merge functions would require a Transaction.
-	static uint64_t expires_reference_version;
-private:
-	const json_spirit::mObject *pObj;
-	// Writeable pointer to the same object.  Will be NULL if initialized from a const object.
-	json_spirit::mObject *wpObj;
-	const json_spirit::mValue *pLast;
-};
-
 // Typedef to cover older code that was written when this class was only a reader and called StatusObjectReader
 typedef JSONDoc StatusObjectReader;

--- a/fdbclient/TaskBucket.actor.cpp
+++ b/fdbclient/TaskBucket.actor.cpp
@ -220,6 +220,7 @@ public:
 		return task;
 	}

+	// Verify that the user configured task verification key still has the user specificied value
 	ACTOR static Future<bool> taskVerify(Reference<TaskBucket> tb, Reference<ReadYourWritesTransaction> tr, Reference<Task> task) {

 		if (task->params.find(Task::reservedTaskParamValidKey) == task->params.end()) {
@ -503,6 +504,7 @@ public:
 		return false;
 	}

+	// Verify that the task's keys are still in the timeout space at the expected timeout prefix
 	ACTOR static Future<bool> isFinished(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<Task> task) {
 		taskBucket->setOptions(tr);

--- a/fdbrpc/BlobStore.actor.cpp
+++ b/fdbrpc/BlobStore.actor.cpp
@ -24,7 +24,6 @@
 #include "libb64/encode.h"
 #include "sha1/SHA1.h"
 #include "time.h"
-#include "fdbclient/json_spirit/json_spirit_reader_template.h"
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/classification.hpp>

@ -66,12 +65,10 @@ BlobStoreEndpoint::BlobKnobs::BlobKnobs() {
 	read_cache_blocks_per_file = CLIENT_KNOBS->BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE;
 	max_send_bytes_per_second = CLIENT_KNOBS->BLOBSTORE_MAX_RECV_BYTES_PER_SECOND;
 	max_recv_bytes_per_second = CLIENT_KNOBS->BLOBSTORE_MAX_SEND_BYTES_PER_SECOND;
-	buckets_to_span = CLIENT_KNOBS->BLOBSTORE_BACKUP_BUCKETS;
 }

 bool BlobStoreEndpoint::BlobKnobs::set(StringRef name, int value) {
 	#define TRY_PARAM(n, sn) if(name == LiteralStringRef(#n) || name == LiteralStringRef(#sn)) { n = value; return true; }
-	TRY_PARAM(buckets_to_span, bts);
 	TRY_PARAM(connect_tries, ct);
 	TRY_PARAM(connect_timeout, cto);
 	TRY_PARAM(max_connection_life, mcl);
@ -98,7 +95,6 @@ std::string BlobStoreEndpoint::BlobKnobs::getURLParameters() const {
 	static BlobKnobs defaults;
 	std::string r;
 	#define _CHECK_PARAM(n, sn) if(n != defaults. n) { r += format("%s%s=%d", r.empty() ? "" : "&", #sn, n); }
-	_CHECK_PARAM(buckets_to_span, bts);
 	_CHECK_PARAM(connect_tries, ct);
 	_CHECK_PARAM(connect_timeout, cto);
 	_CHECK_PARAM(max_connection_life, mcl);
@ -184,7 +180,7 @@ Reference<BlobStoreEndpoint> BlobStoreEndpoint::fromString(std::string const &ur
 		if(error != nullptr)
 			*error = err;
 		TraceEvent(SevWarnAlways, "BlobStoreEndpoint").detail("Description", err).detail("Format", getURLFormat()).detail("URL", url);
-		throw file_not_found();
+		throw backup_invalid_url();
 	}
 }

@ -226,19 +222,23 @@ Future<Void> BlobStoreEndpoint::deleteObject(std::string const &bucket, std::str
 }

 ACTOR Future<Void> deleteBucket_impl(Reference<BlobStoreEndpoint> b, std::string bucket, int *pNumDeleted) {
-	state PromiseStream<BlobStoreEndpoint::ObjectInfo> resultStream;
-	state Future<Void> done = b->getBucketContentsStream(bucket, resultStream);
+	state PromiseStream<BlobStoreEndpoint::ListResult> resultStream;
+	state Future<Void> done = b->listBucketStream(bucket, resultStream);
 	state std::vector<Future<Void>> deleteFutures;
 	loop {
 		choose {
 			when(Void _ = wait(done)) {
 				break;
 			}
-			when(BlobStoreEndpoint::ObjectInfo info = waitNext(resultStream.getFuture())) {
-				if(pNumDeleted == nullptr)
-					deleteFutures.push_back(b->deleteObject(bucket, info.name));
-				else
-					deleteFutures.push_back(map(b->deleteObject(bucket, info.name), [this](Void) -> Void { ++*pNumDeleted; return Void(); }));
+			when(BlobStoreEndpoint::ListResult list = waitNext(resultStream.getFuture())) {
+				for(auto &object : list.objects) {
+					int *pNumDeletedCopy = pNumDeleted;   // avoid capture of this
+					deleteFutures.push_back(map(b->deleteObject(bucket, object.name), [pNumDeletedCopy](Void) -> Void {
+						if(pNumDeletedCopy != nullptr)
+							++*pNumDeletedCopy;
+						return Void();
+					}));
+				}
 			}
 		}
 	}
@ -251,6 +251,18 @@ Future<Void> BlobStoreEndpoint::deleteBucket(std::string const &bucket, int *pNu
 	return deleteBucket_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, pNumDeleted);
 }

+ACTOR Future<Void> createBucket_impl(Reference<BlobStoreEndpoint> b, std::string bucket) {
+	std::string resource = std::string("/") + bucket;
+	HTTP::Headers headers;
+
+	Reference<HTTP::Response> r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409}));
+	return Void();
+}
+
+Future<Void> BlobStoreEndpoint::createBucket(std::string const &bucket) {
+	return createBucket_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket);
+}
+
 ACTOR Future<int64_t> objectSize_impl(Reference<BlobStoreEndpoint> b, std::string bucket, std::string object) {
 	std::string resource = std::string("/") + bucket + "/" + object;
 	HTTP::Headers headers;
@ -429,9 +441,16 @@ Future<Reference<HTTP::Response>> BlobStoreEndpoint::doRequest(std::string const
 	return doRequest_impl(Reference<BlobStoreEndpoint>::addRef(this), verb, resource, headers, pContent, contentLen, successCodes);
 }

-ACTOR Future<Void> getBucketContentsStream_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, PromiseStream<BlobStoreEndpoint::ObjectInfo> results) {
+ACTOR Future<Void> listBucketStream_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, PromiseStream<BlobStoreEndpoint::ListResult> results, Optional<std::string> prefix, Optional<char> delimiter) {
 	// Request 1000 keys at a time, the maximum allowed
-	state std::string resource = std::string("/") + bucket + "/?max-keys=1000&marker=";
+	state std::string resource = "/";
+	resource.append(bucket);
+	resource.append("/?max-keys=1000");
+	if(prefix.present())
+		resource.append("&prefix=").append(HTTP::urlEncode(prefix.get()));
+	if(delimiter.present())
+		resource.append("&delimiter=").append(HTTP::urlEncode(std::string(delimiter.get(), 1)));
+	resource.append("&marker=");
 	state std::string lastFile;
 	state bool more = true;

@ -440,32 +459,31 @@ ACTOR Future<Void> getBucketContentsStream_impl(Reference<BlobStoreEndpoint> bst
 		Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource + HTTP::urlEncode(lastFile), headers, NULL, 0, {200}));

 		try {
+			BlobStoreEndpoint::ListResult result;
 			// Parse the json assuming it is valid and contains the right stuff.  If any exceptions are thrown, throw http_bad_response
-			json_spirit::Value json;
+			json_spirit::mValue json;
 			json_spirit::read_string(r->content, json);
-			for(auto &i : json.get_obj()) {
-				if(i.name_ == "truncated") {
-					more = i.value_.get_bool();
-				}
-				else if(i.name_ == "results") {
-					BlobStoreEndpoint::ObjectInfo info;
-					info.bucket = bucket;
-					for(auto &o : i.value_.get_array()) {
-						info.size = -1;
-						info.name.clear();
-						for(auto &f : o.get_obj()) {
-							if(f.name_ == "size")
-								info.size = f.value_.get_int();
-							else if(f.name_ == "key")
-								info.name = f.value_.get_str();
-						}
-						if(info.size >= 0 && !info.name.empty()) {
-							lastFile = info.name;
-							results.send(std::move(info));
+			JSONDoc doc(json);
+			doc.tryGet("truncated", more);
+			if(doc.has("results")) {
+				for(auto &jsonObject : doc.at("results").get_array()) {
+					JSONDoc objectDoc(jsonObject);
+					BlobStoreEndpoint::ObjectInfo object;
+					objectDoc.get("size", object.size);
+					objectDoc.get("key", object.name);
+					result.objects.push_back(std::move(object));
 				}
 			}
+			if(doc.has("CommonPrefixes")) {
+				for(auto &jsonObject : doc.at("CommonPrefixes").get_array()) {
+					JSONDoc objectDoc(jsonObject);
+					std::string prefix;
+					objectDoc.get("Prefix", prefix);
+					result.commonPrefixes.push_back(std::move(prefix));
 				}
 			}
+
+			results.send(result);
 		} catch(Error &e) {
 			throw http_bad_response();
 		}
@ -474,29 +492,30 @@ ACTOR Future<Void> getBucketContentsStream_impl(Reference<BlobStoreEndpoint> bst
 	return Void();
 }

-Future<Void> BlobStoreEndpoint::getBucketContentsStream(std::string const &bucket, PromiseStream<BlobStoreEndpoint::ObjectInfo> results) {
-	return getBucketContentsStream_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, results);
+Future<Void> BlobStoreEndpoint::listBucketStream(std::string const &bucket, PromiseStream<ListResult> results, Optional<std::string> prefix, Optional<char> delimiter) {
+	return listBucketStream_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, results, prefix, delimiter);
 }

-ACTOR Future<BlobStoreEndpoint::BucketContentsT> getBucketContents_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket) {
-	state BlobStoreEndpoint::BucketContentsT results;
-	state PromiseStream<BlobStoreEndpoint::ObjectInfo> resultStream;
-	state Future<Void> done = bstore->getBucketContentsStream(bucket, resultStream);
+ACTOR Future<BlobStoreEndpoint::ListResult> listBucket_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, Optional<std::string> prefix, Optional<char> delimiter) {
+	state BlobStoreEndpoint::ListResult results;
+	state PromiseStream<BlobStoreEndpoint::ListResult> resultStream;
+	state Future<Void> done = bstore->listBucketStream(bucket, resultStream, prefix, delimiter);
 	loop {
 		choose {
 			when(Void _ = wait(done)) {
 				break;
 			}
-			when(BlobStoreEndpoint::ObjectInfo info = waitNext(resultStream.getFuture())) {
-				results.push_back(info);
+			when(BlobStoreEndpoint::ListResult info = waitNext(resultStream.getFuture())) {
+				results.commonPrefixes.insert(results.commonPrefixes.end(), info.commonPrefixes.begin(), info.commonPrefixes.end());
+				results.objects.insert(results.objects.end(), info.objects.begin(), info.objects.end());
 			}
 		}
 	}
 	return results;
 }

-Future<BlobStoreEndpoint::BucketContentsT> BlobStoreEndpoint::getBucketContents(std::string const &bucket) {
-	return getBucketContents_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket);
+Future<BlobStoreEndpoint::ListResult> BlobStoreEndpoint::listBucket(std::string const &bucket, Optional<std::string> prefix, Optional<char> delimiter) {
+	return listBucket_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, prefix, delimiter);
 }

 std::string BlobStoreEndpoint::hmac_sha1(std::string const &msg) {
@ -639,7 +658,9 @@ ACTOR Future<int> readObject_impl(Reference<BlobStoreEndpoint> bstore, std::stri
 	std::string resource = std::string("/") + bucket + "/" + object;
 	HTTP::Headers headers;
 	headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1);
-	Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 206}));
+	Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 206, 404}));
+	if(r->code == 404)
+		throw file_not_found();
 	if(r->contentLen != r->content.size())  // Double check that this wasn't a header-only response, probably unnecessary
 		throw io_error();
 	// Copy the output bytes, server could have sent more or less bytes than requested so copy at most length bytes
--- a/fdbrpc/BlobStore.h
+++ b/fdbrpc/BlobStore.h
@ -26,7 +26,7 @@
 #include "fdbclient/Knobs.h"
 #include "IRateControl.h"
 #include "HTTP.h"
-#include "fdbclient/json_spirit/json_spirit_writer_template.h"
+#include "JSONDoc.h"

 // Representation of all the things you need to connect to a blob store instance with some credentials.
 // Reference counted because a very large number of them could be needed.
@ -63,8 +63,7 @@ public:
 			read_ahead_blocks,
 			read_cache_blocks_per_file,
 			max_send_bytes_per_second,
-			max_recv_bytes_per_second,
-			buckets_to_span;
+			max_recv_bytes_per_second;
 		bool set(StringRef name, int value);
 		std::string getURLParameters() const;
 		static std::vector<std::string> getKnobDescriptions() {
@ -85,8 +84,7 @@ public:
 				"read_ahead_blocks (or rab)            Number of blocks to read ahead of requested offset.",
 				"read_cache_blocks_per_file (or rcb)   Size of the read cache for a file in blocks.",
 				"max_send_bytes_per_second (or sbps)   Max send bytes per second for all requests combined.",
-				"max_recv_bytes_per_second (or rbps)   Max receive bytes per second for all requests combined (NOT YET USED).",
-				"buckets_to_span (or bts)              Number of buckets that a new backup should distribute over."
+				"max_recv_bytes_per_second (or rbps)   Max receive bytes per second for all requests combined (NOT YET USED)."
 			};
 		}
 	};
@ -148,18 +146,22 @@ public:
 	// Every blob store interaction should ultimately go through this function

 	Future<Reference<HTTP::Response>> doRequest(std::string const &verb, std::string const &resource, const HTTP::Headers &headers, UnsentPacketQueue *pContent, int contentLen, std::set<unsigned int> successCodes);
+
 	struct ObjectInfo {
-		std::string bucket;
 		std::string name;
 		int64_t size;
 	};

+	struct ListResult {
+		std::vector<std::string> commonPrefixes;
+		std::vector<ObjectInfo> objects;
+	};
+
 	// Get bucket contents via a stream, since listing large buckets will take many serial blob requests
-	Future<Void> getBucketContentsStream(std::string const &bucket, PromiseStream<ObjectInfo> results);
+	Future<Void> listBucketStream(std::string const &bucket, PromiseStream<ListResult> results, Optional<std::string> prefix = {}, Optional<char> delimiter = {});

 	// Get a list of the files in a bucket
-	typedef std::vector<ObjectInfo> BucketContentsT;
-	Future<BucketContentsT> getBucketContents(std::string const &bucket);
+	Future<ListResult> listBucket(std::string const &bucket, Optional<std::string> prefix = {}, Optional<char> delimiter = {});

 	// Check if an object exists in a bucket
 	Future<bool> objectExists(std::string const &bucket, std::string const &object);
@ -179,6 +181,9 @@ public:
 	// a deletion of an object completes.
 	Future<Void> deleteBucket(std::string const &bucket, int *pNumDeleted = NULL);

+	// Create a bucket if it does not already exists.
+	Future<Void> createBucket(std::string const &bucket);
+
 	// Useful methods for working with tiny files
 	Future<std::string> readEntireFile(std::string const &bucket, std::string const &object);
 	Future<Void>       writeEntireFile(std::string const &bucket, std::string const &object, std::string const &content);
--- a/fdbrpc/JSONDoc.h
+++ b/fdbrpc/JSONDoc.h
@ -0,0 +1,310 @@
+/*
+ * JSONDoc.h
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "json_spirit/json_spirit_writer_template.h"
+#include "json_spirit/json_spirit_reader_template.h"
+
+// JSONDoc is a convenient reader/writer class for manipulating JSON documents using "paths".
+// Access is done using a "path", which is a string of dot-separated
+// substrings representing representing successively deeper keys found in nested
+// JSON objects within the top level object
+//
+// Most methods are read-only with respect to the source JSON object.
+// The only modifying methods are create(), put(), subDoc(), and mergeInto()
+//
+// JSONDoc maintains some state which is the JSON value that was found during the most recent
+// *successful* path lookup.
+//
+// Examples:
+//    JSONDoc r(some_obj);
+//
+//    // See if JSON doc path a.b.c exists
+//    bool exists = r.has("a.b.c");
+//
+//    // See if JSON doc path a.b.c exists, if it does then assign value to x.  Throws if path exists but T is not compatible.
+//    T x;
+//    bool exists = r.has("a.b.c", x);
+//
+//    // This way you can chain things like this:
+//    bool is_two = r.has("a.b.c", x) && x == 2;
+//
+//    // Alternatively, you can avoid the temp var by making use of the last() method which returns a reference
+//    // to the JSON value at the last successfully found path that has() has seen.
+//    bool is_int = r.has("a.b.c") && r.last().type == json_spirit::int_type;
+//    bool is_two = r.has("a.b.c") && r.last().get_int() == 2;
+//
+//    // The familiar at() method also exists but now supports the same path concept.  
+//    // It will throw in the same circumstances as the original method
+//    int x = r.at("a.b.c").get_int();
+//
+//    // If you wish to access an element with the dot character within its name (e.g., "hostname.example.com"),
+//    // you can do so by setting the "split" flag to false in either the "has" or "get" methods. The example
+//    // below will look for the key "hostname.example.com" as a subkey of the path "a.b.c" (or, more
+//    // precisely, it will look to see if r.has("a").has("b").has("c").has("hostname.example.com", false)).
+//    bool exists = r.has("a.b.c").has("hostname.example.com", false);
+//
+//    // And the familiar operator[] interface exists as well, however only as a synonym for at()
+//    // because this class is only for reading.  Using operator [] will not auto-create null things.
+//    // The following would throw if a.b.c did not exist, or if it was not an int.
+//    int x = r["a.b.c"].get_int();
+struct JSONDoc {
+	JSONDoc() : pObj(NULL) {}
+
+	// Construction from const json_spirit::mObject, trivial and will never throw.
+	// Resulting JSONDoc will not allow modifications.
+	JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(NULL) {}
+
+	// Construction from json_spirit::mObject.  Allows modifications.
+	JSONDoc(json_spirit::mObject &o) : pObj(&o), wpObj(&o) {}
+
+	// Construction from const json_spirit::mValue (which is a Variant type) which will try to
+	// convert it to an mObject.  This will throw if that fails, just as it would
+	// if the caller called get_obj() itself and used the previous constructor instead.
+	JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(NULL) {}
+
+	// Construction from non-const json_spirit::mValue - will convert the mValue to
+	// an object if it isn't already and then attach to it.
+	JSONDoc(json_spirit::mValue &v) {
+		if(v.type() != json_spirit::obj_type)
+			v = json_spirit::mObject();
+		wpObj = &v.get_obj();
+		pObj = wpObj;
+	}
+
+	// Returns whether or not a "path" exists.
+	// Returns true if all elements along path exist
+	// Returns false if any elements along the path are MISSING
+	// Will throw if a non-terminating path element exists BUT is not a JSON Object.
+	// If the "split" flag is set to "false", then this skips the splitting of a
+	// path into on the "dot" character.
+	// When a path is found, pLast is updated.
+	bool has(std::string path, bool split=true) {
+		if (pObj == NULL)
+			return false;
+
+		if (path.empty())
+			return false;
+		size_t start = 0;
+		const json_spirit::mValue *curVal = NULL;
+		while (start < path.size())
+		{
+			// If a path segment is found then curVal must be an object
+			size_t dot;
+			if (split) {
+				dot = path.find_first_of('.', start);
+				if (dot == std::string::npos)
+					dot = path.size();
+			} else {
+				dot = path.size();
+			}
+			std::string key = path.substr(start, dot - start);
+
+			// Get pointer to the current Object that the key has to be in
+			// This will throw if the value is not an Object
+			const json_spirit::mObject *curObj = curVal ? &curVal->get_obj() : pObj;
+
+			// Make sure key exists, if not then return false
+			if (!curObj->count(key))
+				return false;
+
+			// Advance curVal
+			curVal = &curObj->at(key);
+
+			// Advance start position in path
+			start = dot + 1;
+		}
+
+		pLast = curVal;
+		return true;
+	}
+
+	// Creates the given path (forcing Objects to exist along its depth, replacing whatever else might have been there)
+	// and returns a reference to the Value at that location.
+	json_spirit::mValue & create(std::string path, bool split=true) {
+		if (wpObj == NULL || path.empty())
+			throw std::runtime_error("JSON Object not writable or bad JSON path");
+
+		size_t start = 0;
+		json_spirit::mValue *curVal = nullptr;
+		while (start < path.size())
+		{
+			// Get next path segment name
+			size_t dot;
+			if (split) {
+				dot = path.find_first_of('.', start);
+				if (dot == std::string::npos)
+					dot = path.size();
+			} else {
+				dot = path.size();
+			}
+			std::string key = path.substr(start, dot - start);
+			if(key.empty())
+				throw std::runtime_error("invalid JSON path");
+
+			// Get/create pointer to the current Object that the key has to be in
+			// If curVal is defined then force it to be an Object
+			json_spirit::mObject *curObj;
+			if(curVal != nullptr) {
+				if(curVal->type() != json_spirit::obj_type)
+					*curVal = json_spirit::mObject();
+				curObj = &curVal->get_obj();
+			}
+			else  // Otherwise start with the object *this is writing to
+				curObj = wpObj;
+
+			// Make sure key exists, if not then return false
+			if (!curObj->count(key))
+				(*curObj)[key] = json_spirit::mValue();
+
+			// Advance curVal
+			curVal = &((*curObj)[key]);
+
+			// Advance start position in path
+			start = dot + 1;
+		}
+
+		return *curVal;
+	}
+
+	// Creates the path given, puts a value at it, and returns a reference to the value
+	template<typename T>
+	T & put(std::string path, const T & value, bool split=true) {
+		json_spirit::mValue &v = create(path, split);
+		v = value;
+		return v.get_value<T>();
+	}
+
+	// Ensures that a an Object exists at path and returns a JSONDoc that writes to it.
+	JSONDoc subDoc(std::string path, bool split=true) {
+		json_spirit::mValue &v = create(path, split);
+		if(v.type() != json_spirit::obj_type)
+			v = json_spirit::mObject();
+		return JSONDoc(v.get_obj());
+	}
+
+	// Apply a merge operation to two values.  Works for int, double, and string
+	template <typename T>
+	static json_spirit::mObject mergeOperator(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, T const &a, T const &b) {
+		if(op == "$max")
+			return {{op, std::max<T>(a, b)}};
+		if(op == "$min")
+			return {{op, std::min<T>(a, b)}};
+		if(op == "$sum")
+			return {{op, a + b}};
+		throw std::exception();
+	}
+
+	// This is just a convenience function to make calling mergeOperator look cleaner
+	template <typename T>
+	static json_spirit::mObject mergeOperatorWrapper(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, const json_spirit::mValue &a, const json_spirit::mValue &b) {
+		return mergeOperator<T>(op, op_a, op_b, a.get_value<T>(), b.get_value<T>());
+	}
+
+	static inline std::string getOperator(const json_spirit::mObject &obj) {
+		for(auto &k : obj)
+			if(!k.first.empty() && k.first[0] == '$')
+				return k.first;
+		return std::string();
+	}
+
+	// Merge src into dest, applying merge operators
+	static void mergeInto(json_spirit::mObject &dst, const json_spirit::mObject &src);
+	static void mergeValueInto(json_spirit::mValue &d, const json_spirit::mValue &s);
+
+	// Remove any merge operators that never met any mates.
+	static void cleanOps(json_spirit::mObject &obj);
+	void cleanOps() {
+		if(wpObj == nullptr)
+			throw std::runtime_error("JSON Object not writable");
+
+		return cleanOps(*wpObj);
+	}
+
+	void absorb(const JSONDoc &doc) {
+		if(wpObj == nullptr)
+			throw std::runtime_error("JSON Object not writable");
+
+		if(doc.pObj == nullptr)
+			throw std::runtime_error("JSON Object not readable");
+
+		mergeInto(*wpObj, *doc.pObj);
+	}
+	
+	// Returns whether or not a "path" exists.
+	// Returns true if all elements along path exist
+	// Returns false if any elements along the path are MISSING
+	// Sets out to the value of the thing that path refers to
+	// Will throw if a non-terminating path element exists BUT is not a JSON Object.
+	// Will throw if all elements along path exists but T is an incompatible type
+	template <typename T> bool get(const std::string path, T &out, bool split=true) {
+		bool r = has(path, split);
+		if (r)
+			out = pLast->get_value<T>();
+		return r;
+	}
+
+	// For convenience, wraps get() in a try/catch and returns false UNLESS the path existed and was a compatible type.
+	template <typename T> bool tryGet(const std::string path, T &out, bool split=true) {
+		try { return get(path, out, split); } catch(...) {}
+		return false;
+	}
+
+	const json_spirit::mValue & at(const std::string path, bool split=true) {
+		if (has(path, split))
+			return last();
+		throw std::runtime_error("JSON path doesn't exist");
+	}
+
+	const json_spirit::mValue & operator[](const std::string path) {
+		return at(path);
+	}
+
+	const json_spirit::mValue & last() const { return *pLast; }
+	bool valid() const { return pObj != NULL; }
+
+	const json_spirit::mObject & obj() {
+		// This dummy object is necessary to make working with obj() easier when this does not currently
+		// point to a valid mObject.  valid() can be called to explicitly check for this scenario, but
+		// calling obj() at least will not seg fault and instead return a const reference to an empty mObject.
+		// This is very useful when iterating using obj() to access the underlying mObject.
+		static const json_spirit::mObject dummy;
+		return pObj ? *pObj : dummy;
+	}
+
+	// Return reference to writeable underlying mObject but only if *this was initialized with a writeable value or object
+	json_spirit::mObject & wobj() {
+		ASSERT(wpObj != nullptr);
+		return *wpObj;
+	}
+
+	// This is the version used to represent 'now' for use by the $expires operator.
+	// By default, nothing will expire and it is up to the user of JSONDoc to update this value if
+	// it is intended to be used.
+	// This is slightly hackish but otherwise the JSON merge functions would require a Transaction.
+	static uint64_t expires_reference_version;
+private:
+	const json_spirit::mObject *pObj;
+	// Writeable pointer to the same object.  Will be NULL if initialized from a const object.
+	json_spirit::mObject *wpObj;
+	const json_spirit::mValue *pLast;
+};
+
--- a/fdbserver/workloads/BackupCorrectness.actor.cpp
+++ b/fdbserver/workloads/BackupCorrectness.actor.cpp
@ -162,36 +162,42 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 					state int resultWait = wait(backupAgent->waitBackup(cx, backupTag.tagName, false));
 					UidAndAbortedFlagT uidFlag = wait(backupTag.getOrThrow(cx));
 					state UID logUid = uidFlag.first;
-					state std::string lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getOrThrow(cx, false, backup_unneeded()));
+					state Reference<IBackupContainer> lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getD(cx));

-					state std::string restorableFile = joinPath(lastBackupContainer, "restorable");
-					TraceEvent("BARW_lastBackupContainer", randomID).detail("backupTag", printable(tag)).detail("lastBackupContainer", lastBackupContainer)
-						.detail("logUid", logUid).detail("waitStatus", resultWait).detail("restorable", restorableFile);
+					state bool restorable = false;
+					if(lastBackupContainer) {
+						BackupDescription desc = wait(lastBackupContainer->describeBackup());
+						restorable = desc.maxRestorableVersion.present();
+					}
+
+					TraceEvent("BARW_lastBackupContainer", randomID)
+						.detail("backupTag", printable(tag))
+						.detail("lastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "")
+						.detail("logUid", logUid).detail("waitStatus", resultWait).detail("restorable", restorable);

 					// Do not check the backup, if aborted
 					if (resultWait == BackupAgentBase::STATE_ABORTED) {
 					}
-
 					// Ensure that a backup container was found
-					else if (lastBackupContainer.empty()) {
+					else if (!lastBackupContainer) {
 						TraceEvent("BARW_missingBackupContainer", randomID).detail("logUid", logUid).detail("backupTag", printable(tag)).detail("waitStatus", resultWait);
 						printf("BackupCorrectnessMissingBackupContainer   tag: %s  status: %d\n", printable(tag).c_str(), resultWait);
 					}
-
-					// Ensure that the restorable file is present
+					// Check that backup is restorable
 					else {
-						bool rfExists = wait(IBackupContainer::openContainer(lastBackupContainer)->fileExists(restorableFile));
-						if(!rfExists) {
-							TraceEvent("BARW_missingBackupRestoreFile", randomID).detail("logUid", logUid).detail("backupTag", printable(tag))
-								.detail("backupFolder", lastBackupContainer).detail("restorable", restorableFile).detail("waitStatus", resultWait);
-							printf("BackupCorrectnessMissingRestorable: %s   tag: %s\n", restorableFile.c_str(), printable(tag).c_str());
+						if(!restorable) {
+							TraceEvent("BARW_notRestorable", randomID).detail("logUid", logUid).detail("backupTag", printable(tag))
+								.detail("backupFolder", lastBackupContainer->getURL()).detail("waitStatus", resultWait);
+							printf("BackupCorrectnessNotRestorable:  tag: %s\n", printable(tag).c_str());
 						}
 					}

 					// Abort the backup, if not the first backup because the second backup may have aborted the backup by now
 					if (startDelay) {
 						TraceEvent("BARW_doBackupAbortBackup2", randomID).detail("tag", printable(tag))
-							.detail("waitStatus", resultWait).detail("lastBackupContainer", lastBackupContainer).detail("restorable", restorableFile);
+							.detail("waitStatus", resultWait)
+							.detail("lastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "")
+							.detail("restorable", restorable);
 						Void _ = wait(backupAgent->abortBackup(cx, tag.toString()));
 					}
 					else {
@ -309,7 +315,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 			state KeyBackedTag keyBackedTag = makeBackupTag(self->backupTag.toString());
 			UidAndAbortedFlagT uidFlag = wait(keyBackedTag.getOrThrow(cx));
 			state UID logUid = uidFlag.first;
-			state std::string lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getOrThrow(cx));
+			state Reference<IBackupContainer> lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getD(cx));

 			// Occasionally start yet another backup that might still be running when we restore
 			if (!self->locked && BUGGIFY) {
@ -327,9 +333,9 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 			TEST(!startRestore.isReady()); //Restore starts at specified time
 			Void _ = wait(startRestore);
 			
-			if ((lastBackupContainer.size()) && (self->performRestore)) {
+			if (lastBackupContainer && self->performRestore) {
 				if (g_random->random01() < 0.5) {
-					Void _ = wait(attemptDirtyRestore(self, cx, &backupAgent, StringRef(lastBackupContainer), randomID));
+					Void _ = wait(attemptDirtyRestore(self, cx, &backupAgent, StringRef(lastBackupContainer->getURL()), randomID));
 				}
 				Void _ = wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
 					for (auto &kvrange : self->backupRanges)
@ -338,7 +344,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 				}));

 				// restore database
-				TraceEvent("BARW_restore", randomID).detail("lastBackupContainer", lastBackupContainer).detail("restoreAfter", self->restoreAfter).detail("backupTag", printable(self->backupTag));
+				TraceEvent("BARW_restore", randomID).detail("lastBackupContainer", lastBackupContainer->getURL()).detail("restoreAfter", self->restoreAfter).detail("backupTag", printable(self->backupTag));
 				
 				state std::vector<Future<Version>> restores;
 				state std::vector<Standalone<StringRef>> restoreTags;
@ -348,7 +354,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 					auto range = self->backupRanges[restoreIndex];
 					Standalone<StringRef> restoreTag(self->backupTag.toString() + "_" + std::to_string(restoreIndex));
 					restoreTags.push_back(restoreTag);
-					restores.push_back(backupAgent.restore(cx, restoreTag, KeyRef(lastBackupContainer), true, -1, true, range, Key(), Key(), self->locked));
+					restores.push_back(backupAgent.restore(cx, restoreTag, KeyRef(lastBackupContainer->getURL()), true, -1, true, range, Key(), Key(), self->locked));
 				}
 				
 				// Sometimes kill and restart the restore
@ -363,7 +369,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
 								tr->clear(self->backupRanges[restoreIndex]);
 								return Void();
 							}));
-							restores[restoreIndex] = backupAgent.restore(cx, restoreTags[restoreIndex], KeyRef(lastBackupContainer), true, -1, true, self->backupRanges[restoreIndex], Key(), Key(), self->locked);
+							restores[restoreIndex] = backupAgent.restore(cx, restoreTags[restoreIndex], KeyRef(lastBackupContainer->getURL()), true, -1, true, self->backupRanges[restoreIndex], Key(), Key(), self->locked);
 						}
 					}
 				}
--- a/flow/Platform.cpp
+++ b/flow/Platform.cpp
@ -1887,6 +1887,21 @@ std::vector<std::string> listFiles( std::string const& directory, std::string co
 std::vector<std::string> listDirectories( std::string const& directory ) {
 	return findFiles( directory, "", &acceptDirectory );
 }
+
+void findFilesRecursively(std::string path, std::vector<std::string> &out) {
+	// Add files to output, prefixing path
+	std::vector<std::string> files = platform::listFiles(path);
+	for(auto const &f : files)
+		out.push_back(joinPath(path, f));
+
+	// Recurse for directories
+	std::vector<std::string> directories = platform::listDirectories(path);
+	for(auto const &dir : directories) {
+		if(dir != "." && dir != "..")
+			findFilesRecursively(joinPath(path, dir), out);
+	}
+};
+
 }; // namespace platform


--- a/flow/Platform.h
+++ b/flow/Platform.h
@ -330,6 +330,8 @@ std::vector<std::string> listFiles( std::string const& directory, std::string co
 // returns directory names relative to directory
 std::vector<std::string> listDirectories( std::string const& directory );

+void findFilesRecursively(std::string path, std::vector<std::string> &out);
+
 // Tag the given file as "temporary", i.e. not really needing commits to disk
 void makeTemporary( const char* filename );

--- a/flow/error_definitions.h
+++ b/flow/error_definitions.h
@ -169,12 +169,14 @@ ERROR( restore_error, 2301, "Restore error")
 ERROR( backup_duplicate, 2311, "Backup duplicate request")
 ERROR( backup_unneeded, 2312, "Backup unneeded request")
 ERROR( backup_bad_block_size, 2313, "Backup file block size too small")
+ERROR( backup_invalid_url, 2314, "Backup Container URL invalid")
+ERROR( backup_invalid_info, 2315, "Backup Container URL invalid")
 ERROR( restore_invalid_version, 2361, "Invalid restore version")
 ERROR( restore_corrupted_data, 2362, "Corrupted backup data")
 ERROR( restore_missing_data, 2363, "Missing backup data")
 ERROR( restore_duplicate_tag, 2364, "Restore duplicate request")
 ERROR( restore_unknown_tag, 2365, "Restore tag does not exist")
-ERROR( restore_unknown_file_type, 2366, "Unknown backup file type")
+ERROR( restore_unknown_file_type, 2366, "Unknown backup/restore file type")
 ERROR( restore_unsupported_file_version, 2367, "Unsupported backup file version")
 ERROR( restore_bad_read, 2368, "Unexpected number of bytes read")
 ERROR( restore_corrupted_data_padding, 2369, "Backup file has unexpected padding bytes")