IBackupContainer has been rewritten to be a logical interface for storing, reading, deleting, expiring, and querying backup data. The details of how the data is organized or stored is now hidden from users of the interface. Both the local and blobstore containers have been rewritten, the key changes being a multi level directory structure and no more use of temporary files or pseudo-symlinks in the blob store implementation. This refactor has a large impact radius as the previous backup container was just a thin wrapper that presented a single level list of files and offered no methods for managing or interpreting the file structure so all of that logic was spread around other places in the code base. This made moving to the new blob store schema very messy, and without this refactor further changes in the future would only be worse.

Several backup tasks have been cleaned up / simplified because they no longer need to manage the ‘raw’ structure of the backup.  The addition of IBackupFile and its finish() method simplified the log and range writer tasks.  Updated BlobStoreEndpoint to support now-required bucket creation and bucket listing prefix/delimiter options for finding common prefixes.  Added KeyBackedSet<T> type.  Moved JSONDoc to its own header.  Added platform::findFilesRecursively().

Still to do:  update command line tool to use new IBackupContainer interface, fix bugs in Restore startup.
This commit is contained in:
Stephen Atherton 2017-11-14 23:33:17 -08:00
parent 45fa3680fa
commit 3dfaf13b67
17 changed files with 1788 additions and 1741 deletions

View File

@ -30,6 +30,7 @@
#include "KeyBackedTypes.h"
#include <ctime>
#include <climits>
#include "BackupContainer.h"
class BackupAgentBase : NonCopyable {
public:
@ -262,6 +263,7 @@ public:
Future<std::string> getStatus(Database cx, int errorLimit, std::string tagName);
Future<Version> getLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName);
void setLastRestorable(Reference<ReadYourWritesTransaction> tr, Key tagName, Version version);
// stopWhenDone will return when the backup is stopped, if enabled. Otherwise, it
// will return when the backup directory is restorable.
@ -269,14 +271,6 @@ public:
static Future<std::string> getBackupInfo(std::string backupContainer, Version* defaultVersion = NULL);
static std::string getTempFilename();
// Data(key ranges) and Log files will have their file size in the name because it is not at all convenient
// to fetch filesizes from either of the current BackupContainer implementations. LocalDirectory requires
// querying each file separately, and Blob Store doesn't support renames so the apparent log and data files
// are actually a kind of symbolic link so to get the size of the final file it would have to be read.
static std::string getDataFilename(Version version, int64_t size, int blockSize);
static std::string getLogFilename(Version beginVer, Version endVer, int64_t size, int blockSize);
Future<int64_t> getTaskCount(Reference<ReadYourWritesTransaction> tr) { return taskBucket->getTaskCount(tr); }
Future<int64_t> getTaskCount(Database cx) { return taskBucket->getTaskCount(cx); }
Future<Void> watchTaskCount(Reference<ReadYourWritesTransaction> tr) { return taskBucket->watchTaskCount(tr); }
@ -562,14 +556,39 @@ protected:
Subspace configSpace;
};
template<> inline Tuple Codec<Reference<IBackupContainer>>::pack(Reference<IBackupContainer> const &bc) {
return Tuple().append(StringRef(bc->getURL()));
}
template<> inline Reference<IBackupContainer> Codec<Reference<IBackupContainer>>::unpack(Tuple const &val) {
return IBackupContainer::openContainer(val.getString(0).toString());
}
class BackupConfig : public KeyBackedConfig {
public:
BackupConfig(UID uid = UID()) : KeyBackedConfig(fileBackupPrefixRange.begin, uid) {}
BackupConfig(Reference<Task> task) : KeyBackedConfig(fileBackupPrefixRange.begin, task) {}
// rangeFileMap maps a keyrange file's End to its Begin and Filename
typedef std::pair<Key, Key> KeyAndFilenameT;
typedef KeyBackedMap<Key, KeyAndFilenameT> RangeFileMapT;
struct RangeSlice {
Key begin;
Version version;
std::string fileName;
int64_t fileSize;
Tuple pack() const {
return Tuple().append(begin).append(version).append(StringRef(fileName)).append(fileSize);
}
static RangeSlice unpack(Tuple const &t) {
RangeSlice r;
int i = 0;
r.begin = t.getString(i++);
r.version = t.getInt(i++);
r.fileName = t.getString(i++).toString();
r.fileSize = t.getInt(i++);
return r;
}
};
typedef KeyBackedMap<Key, RangeSlice> RangeFileMapT;
RangeFileMapT rangeFileMap() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
@ -586,10 +605,15 @@ public:
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
KeyBackedProperty<std::string> backupContainer() {
KeyBackedProperty<Reference<IBackupContainer>> backupContainer() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));
}
// Get the backup container URL only without creating a backup container instance.
KeyBackedProperty<Reference<IBackupContainer>> backupContainerURL() {
return configSpace.pack(LiteralStringRef("backupContainer"));
}
// Stop differntial logging if already started or don't start after completing KV ranges
KeyBackedProperty<bool> stopWhenDone() {
return configSpace.pack(LiteralStringRef(__FUNCTION__));

View File

@ -364,6 +364,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RangeResultWithVersi
begin = firstGreaterThan(values.end()[-1].key);
if (!values.more && !limits.isReached()) {
if(terminator)
results.sendError(end_of_stream());
return Void();
}
@ -456,6 +457,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
results.send(rcGroup);
}
if(terminator)
results.sendError(end_of_stream());
return Void();
}

File diff suppressed because it is too large Load Diff

View File

@ -23,71 +23,149 @@
#include "flow/flow.h"
#include "fdbrpc/IAsyncFile.h"
#include "fdbrpc/BlobStore.h"
#include "FDBTypes.h"
#include <vector>
// Class representing a container for backup files, such as a mounted directory or a remote filesystem.
// Append-only file interface for writing backup data
// TODO: Move the log file and range file format encoder/decoder classes to this file, probably as part of IBackupFile.
class IBackupFile {
public:
IBackupFile(std::string fileName) : m_fileName(fileName), m_offset(0) {}
virtual ~IBackupFile() {}
// Backup files are append-only and cannot have more than 1 append outstanding at once.
virtual Future<Void> append(StringRef data) = 0;
virtual Future<Void> finish() = 0;
inline std::string getFileName() const {
return m_fileName;
}
inline int64_t size() const {
return m_offset;
}
virtual void addref() = 0;
virtual void delref() = 0;
protected:
std::string m_fileName;
int64_t m_offset;
};
// Structures for various backup components
struct LogFile {
Version beginVersion;
Version endVersion;
uint32_t blockSize;
std::string fileName;
int64_t fileSize;
// Order by beginVersion, break ties with endVersion
bool operator< (const LogFile &rhs) const {
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
}
};
struct RangeFile {
Version version;
uint32_t blockSize;
std::string fileName;
int64_t fileSize;
// Order by version, break ties with name
bool operator< (const RangeFile &rhs) const {
return version == rhs.version ? fileName < rhs.fileName : version < rhs.version;
}
};
struct KeyspaceSnapshotFile {
Version beginVersion;
Version endVersion;
std::string fileName;
int64_t totalSize;
// Order by beginVersion, break ties with endVersion
bool operator< (const KeyspaceSnapshotFile &rhs) const {
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
}
};
struct BackupDescription {
std::string url;
std::vector<KeyspaceSnapshotFile> snapshots;
Optional<Version> minLogBegin;
Optional<Version> maxLogEnd;
Optional<Version> contiguousLogEnd;
Optional<Version> maxRestorableVersion;
Optional<Version> minRestorableVersion;
std::string extendedDetail; // Freeform container-specific info.
std::string toString() const;
};
struct RestorableFileSet {
Version targetVersion;
std::vector<LogFile> logs;
std::vector<RangeFile> ranges;
};
/* IBackupContainer is an interface to a set of backup data, which contains
* - backup metadata
* - log files
* - range files
* - keyspace snapshot files defining a complete non overlapping key space snapshot
*
* Files in a container are identified by a name. This can be any string, whatever
* makes sense for the underlying storage system.
*
* Reading files is done by file name. File names are discovered by getting a RestorableFileSet.
*
* For remote data stores that are filesystem-like, it's probably best to inherit BackupContainerFileSystem.
*/
class IBackupContainer {
public:
virtual void addref() = 0;
virtual void delref() = 0;
enum EMode { READONLY, WRITEONLY };
static std::vector<std::string> getURLFormats();
IBackupContainer() {}
virtual ~IBackupContainer() {}
// Create the container (if necessary)
// Create the container
virtual Future<Void> create() = 0;
// Open a named file in the container for reading (restore mode) or writing (backup mode)
virtual Future<Reference<IAsyncFile>> openFile(std::string name, EMode mode) = 0;
// Open a log file or range file for writing
virtual Future<Reference<IBackupFile>> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0;
virtual Future<Reference<IBackupFile>> writeRangeFile(Version version, int blockSize) = 0;
// Returns whether or not a file exists in the container
virtual Future<bool> fileExists(std::string name) = 0;
// Write a KeyspaceSnapshotFile of range file names representing a full non overlapping
// snapshot of the key ranges this backup is targeting.
virtual Future<Void> writeKeyspaceSnapshotFile(std::vector<std::string> fileNames, int64_t totalBytes) = 0;
// Get a list of backup files in the container
virtual Future<std::vector<std::string>> listFiles() = 0;
// Open a file for read by name
virtual Future<Reference<IAsyncFile>> readFile(std::string name) = 0;
// Rename a file
virtual Future<Void> renameFile(std::string from, std::string to) = 0;
// Delete all data up to (but not including endVersion)
virtual Future<Void> expireData(Version endVersion) = 0;
// Delete entire container. During the process, if pNumDeleted is not null it will be
// updated with the count of deleted files so that progress can be seen.
virtual Future<Void> deleteContainer(int *pNumDeleted = nullptr) = 0;
// Uses the virtual methods to describe the backup contents
virtual Future<BackupDescription> describeBackup() = 0;
// Get exactly the files necessary to restore to targetVersion. Returns non-present if
// restore to given version is not possible.
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion) = 0;
// Get an IBackupContainer based on a container spec string
static Reference<IBackupContainer> openContainer(std::string url, std::string *error = nullptr);
};
class BackupContainerBlobStore : public IBackupContainer, ReferenceCounted<BackupContainerBlobStore> {
public:
void addref() { return ReferenceCounted<BackupContainerBlobStore>::addref(); }
void delref() { return ReferenceCounted<BackupContainerBlobStore>::delref(); }
static const std::string META_BUCKET;
static std::string getURLFormat() { return BlobStoreEndpoint::getURLFormat(true); }
static Future<std::vector<std::string>> listBackupContainers(Reference<BlobStoreEndpoint> const &bs);
BackupContainerBlobStore(Reference<BlobStoreEndpoint> bstore, std::string name)
: m_bstore(bstore), m_bucketPrefix(name) {}
virtual ~BackupContainerBlobStore() { m_bucketCount.cancel(); }
// IBackupContainer methods
Future<Void> create();
Future<Reference<IAsyncFile>> openFile(std::string name, EMode mode);
Future<bool> fileExists(std::string name);
Future<Void> renameFile(std::string from, std::string to);
Future<std::vector<std::string>> listFiles();
Future<Void> listFilesStream(PromiseStream<BlobStoreEndpoint::ObjectInfo> results);
Future<Void> deleteContainer(int *pNumDeleted = NULL);
Future<std::string> containerInfo();
Future<int> getBucketCount();
std::string getBucketString(int num) { return format("%s_%d", m_bucketPrefix.c_str(), num); }
Future<std::string> getBucketForFile(std::string const &name);
Future<std::vector<std::string>> getBucketList();
Reference<BlobStoreEndpoint> m_bstore;
std::string m_bucketPrefix;
Future<int> m_bucketCount;
static Reference<IBackupContainer> openContainer(std::string url);
static std::vector<std::string> getURLFormats();
static std::vector<std::string> listContainers(std::string baseURL);
std::string getURL() const {
return URL;
}
static std::string lastOpenError;
private:
std::string URL;
};

File diff suppressed because it is too large Load Diff

View File

@ -135,20 +135,32 @@ public:
}
Future<Optional<T>> get(Database cx, bool snapshot = false) const {
auto &copy = *this;
return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
return get(tr, snapshot);
return copy.get(tr, snapshot);
});
}
Future<T> getD(Database cx, bool snapshot = false, T defaultValue = T()) const {
auto &copy = *this;
return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
return copy.getD(tr, snapshot, defaultValue);
});
}
Future<T> getOrThrow(Database cx, bool snapshot = false, Error err = key_not_found()) const {
auto &copy = *this;
return runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
return getOrThrow(tr, snapshot, err);
return copy.getOrThrow(tr, snapshot, err);
});
}
@ -267,3 +279,53 @@ public:
Subspace space;
};
template <typename _ValueType>
class KeyBackedSet {
public:
KeyBackedSet(KeyRef key) : space(key) {}
typedef _ValueType ValueType;
typedef std::vector<ValueType> Values;
// If end is not present one key past the end of the map is used.
Future<Values> getRange(Reference<ReadYourWritesTransaction> tr, ValueType const &begin, Optional<ValueType> const &end, int limit, bool snapshot = false) const {
Subspace s = space; // 'this' could be invalid inside lambda
Key endKey = end.present() ? s.pack(Codec<ValueType>::pack(end.get())) : space.range().end;
return map(tr->getRange(KeyRangeRef(s.pack(Codec<ValueType>::pack(begin)), endKey), GetRangeLimits(limit), snapshot),
[s] (Standalone<RangeResultRef> const &kvs) -> Values {
Values results;
for(int i = 0; i < kvs.size(); ++i) {
results.push_back(Codec<ValueType>::unpack(s.unpack(kvs[i].key)));
}
return results;
});
}
Future<bool> exists(Reference<ReadYourWritesTransaction> tr, ValueType const &val, bool snapshot = false) const {
return map(tr->get(space.pack(Codec<ValueType>::pack(val)), snapshot), [](Optional<Value> const &val) -> bool {
return val.present();
});
}
// Returns the expectedSize of the set key
int insert(Reference<ReadYourWritesTransaction> tr, ValueType const &val) {
Key k = space.pack(Codec<ValueType>::pack(val));
tr->set(k, StringRef());
return k.expectedSize();
}
void erase(Reference<ReadYourWritesTransaction> tr, ValueType const &val) {
return tr->clear(space.pack(Codec<ValueType>::pack(val)));
}
void erase(Reference<ReadYourWritesTransaction> tr, ValueType const &begin, ValueType const &end) {
return tr->clear(KeyRangeRef(space.pack(Codec<ValueType>::pack(begin)), space.pack(Codec<ValueType>::pack(end))));
}
void clear(Reference<ReadYourWritesTransaction> tr) {
return tr->clear(space.range());
}
Subspace space;
};

View File

@ -152,7 +152,6 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE, 2 );
init( BLOBSTORE_MULTIPART_MAX_PART_SIZE, 20000000 );
init( BLOBSTORE_MULTIPART_MIN_PART_SIZE, 5242880 );
init( BLOBSTORE_BACKUP_BUCKETS, 100 );
// These are basically unlimited by default but can be used to reduce blob IO if needed
init( BLOBSTORE_REQUESTS_PER_SECOND, 200 );

View File

@ -158,7 +158,6 @@ public:
int BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE;
int BLOBSTORE_MAX_SEND_BYTES_PER_SECOND;
int BLOBSTORE_MAX_RECV_BYTES_PER_SECOND;
int BLOBSTORE_BACKUP_BUCKETS;
int CONSISTENCY_CHECK_RATE_LIMIT;
int CONSISTENCY_CHECK_RATE_WINDOW;

View File

@ -21,8 +21,7 @@
#ifndef FDBCLIENT_STATUS_H
#define FDBCLIENT_STATUS_H
#include "json_spirit/json_spirit_writer_template.h"
#include "json_spirit/json_spirit_reader_template.h"
#include "../fdbrpc/JSONDoc.h"
struct StatusObject : json_spirit::mObject {
typedef json_spirit::mObject Map;
@ -71,291 +70,6 @@ static StatusObject makeMessage(const char *name, const char *description) {
return out;
}
// JSONDoc is a convenient reader/writer class for manipulating JSON documents using "paths".
// Access is done using a "path", which is a string of dot-separated
// substrings representing representing successively deeper keys found in nested
// JSON objects within the top level object
//
// Most methods are read-only with respect to the source JSON object.
// The only modifying methods are create(), put(), subDoc(), and mergeInto()
//
// JSONDoc maintains some state which is the JSON value that was found during the most recent
// *successful* path lookup.
//
// Examples:
// StatusObjectReader r(some_obj);
//
// // See if JSON doc path a.b.c exists
// bool exists = r.has("a.b.c");
//
// // See if JSON doc path a.b.c exists, if it does then assign value to x. Throws if path exists but T is not compatible.
// T x;
// bool exists = r.has("a.b.c", x);
//
// // This way you can chain things like this:
// bool is_two = r.has("a.b.c", x) && x == 2;
//
// // Alternatively, you can avoid the temp var by making use of the last() method which returns a reference
// // to the JSON value at the last successfully found path that has() has seen.
// bool is_int = r.has("a.b.c") && r.last().type == json_spirit::int_type;
// bool is_two = r.has("a.b.c") && r.last().get_int() == 2;
//
// // The familiar at() method also exists but now supports the same path concept.
// // It will throw in the same circumstances as the original method
// int x = r.at("a.b.c").get_int();
//
// // If you wish to access an element with the dot character within its name (e.g., "hostname.example.com"),
// // you can do so by setting the "split" flag to false in either the "has" or "get" methods. The example
// // below will look for the key "hostname.example.com" as a subkey of the path "a.b.c" (or, more
// // precisely, it will look to see if r.has("a").has("b").has("c").has("hostname.example.com", false)).
// bool exists = r.has("a.b.c").has("hostname.example.com", false);
//
// // And the familiar operator[] interface exists as well, however only as a synonym for at()
// // because this class is only for reading. Using operator [] will not auto-create null things.
// // The following would throw if a.b.c did not exist, or if it was not an int.
// int x = r["a.b.c"].get_int();
struct JSONDoc {
JSONDoc() : pObj(NULL) {}
// Construction from const json_spirit::mObject, trivial and will never throw.
// Resulting JSONDoc will not allow modifications.
JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(NULL) {}
// Construction from json_spirit::mObject. Allows modifications.
JSONDoc(json_spirit::mObject &o) : pObj(&o), wpObj(&o) {}
// Construction from const json_spirit::mValue (which is a Variant type) which will try to
// convert it to an mObject. This will throw if that fails, just as it would
// if the caller called get_obj() itself and used the previous constructor instead.
JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(NULL) {}
// Construction from non-const json_spirit::mValue - will convert the mValue to
// an object if it isn't already and then attach to it.
JSONDoc(json_spirit::mValue &v) {
if(v.type() != json_spirit::obj_type)
v = json_spirit::mObject();
wpObj = &v.get_obj();
pObj = wpObj;
}
// Returns whether or not a "path" exists.
// Returns true if all elements along path exist
// Returns false if any elements along the path are MISSING
// Will throw if a non-terminating path element exists BUT is not a JSON Object.
// If the "split" flag is set to "false", then this skips the splitting of a
// path into on the "dot" character.
// When a path is found, pLast is updated.
bool has(std::string path, bool split=true) {
if (pObj == NULL)
return false;
if (path.empty())
return false;
size_t start = 0;
const json_spirit::mValue *curVal = NULL;
while (start < path.size())
{
// If a path segment is found then curVal must be an object
size_t dot;
if (split) {
dot = path.find_first_of('.', start);
if (dot == std::string::npos)
dot = path.size();
} else {
dot = path.size();
}
std::string key = path.substr(start, dot - start);
// Get pointer to the current Object that the key has to be in
// This will throw if the value is not an Object
const json_spirit::mObject *curObj = curVal ? &curVal->get_obj() : pObj;
// Make sure key exists, if not then return false
if (!curObj->count(key))
return false;
// Advance curVal
curVal = &curObj->at(key);
// Advance start position in path
start = dot + 1;
}
pLast = curVal;
return true;
}
// Creates the given path (forcing Objects to exist along its depth, replacing whatever else might have been there)
// and returns a reference to the Value at that location.
json_spirit::mValue & create(std::string path, bool split=true) {
if (wpObj == NULL || path.empty())
throw std::runtime_error("JSON Object not writable or bad JSON path");
size_t start = 0;
json_spirit::mValue *curVal = nullptr;
while (start < path.size())
{
// Get next path segment name
size_t dot;
if (split) {
dot = path.find_first_of('.', start);
if (dot == std::string::npos)
dot = path.size();
} else {
dot = path.size();
}
std::string key = path.substr(start, dot - start);
if(key.empty())
throw std::runtime_error("invalid JSON path");
// Get/create pointer to the current Object that the key has to be in
// If curVal is defined then force it to be an Object
json_spirit::mObject *curObj;
if(curVal != nullptr) {
if(curVal->type() != json_spirit::obj_type)
*curVal = json_spirit::mObject();
curObj = &curVal->get_obj();
}
else // Otherwise start with the object *this is writing to
curObj = wpObj;
// Make sure key exists, if not then return false
if (!curObj->count(key))
(*curObj)[key] = json_spirit::mValue();
// Advance curVal
curVal = &((*curObj)[key]);
// Advance start position in path
start = dot + 1;
}
return *curVal;
}
// Creates the path given, puts a value at it, and returns a reference to the value
template<typename T>
T & put(std::string path, const T & value, bool split=true) {
json_spirit::mValue &v = create(path, split);
v = value;
return v.get_value<T>();
}
// Ensures that a an Object exists at path and returns a JSONDoc that writes to it.
JSONDoc subDoc(std::string path, bool split=true) {
json_spirit::mValue &v = create(path, split);
if(v.type() != json_spirit::obj_type)
v = json_spirit::mObject();
return JSONDoc(v.get_obj());
}
// Apply a merge operation to two values. Works for int, double, and string
template <typename T>
static json_spirit::mObject mergeOperator(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, T const &a, T const &b) {
if(op == "$max")
return {{op, std::max<T>(a, b)}};
if(op == "$min")
return {{op, std::min<T>(a, b)}};
if(op == "$sum")
return {{op, a + b}};
throw std::exception();
}
// This is just a convenience function to make calling mergeOperator look cleaner
template <typename T>
static json_spirit::mObject mergeOperatorWrapper(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, const json_spirit::mValue &a, const json_spirit::mValue &b) {
return mergeOperator<T>(op, op_a, op_b, a.get_value<T>(), b.get_value<T>());
}
static inline std::string getOperator(const json_spirit::mObject &obj) {
for(auto &k : obj)
if(!k.first.empty() && k.first[0] == '$')
return k.first;
return std::string();
}
// Merge src into dest, applying merge operators
static void mergeInto(json_spirit::mObject &dst, const json_spirit::mObject &src);
static void mergeValueInto(json_spirit::mValue &d, const json_spirit::mValue &s);
// Remove any merge operators that never met any mates.
static void cleanOps(json_spirit::mObject &obj);
void cleanOps() {
if(wpObj == nullptr)
throw std::runtime_error("JSON Object not writable");
return cleanOps(*wpObj);
}
void absorb(const JSONDoc &doc) {
if(wpObj == nullptr)
throw std::runtime_error("JSON Object not writable");
if(doc.pObj == nullptr)
throw std::runtime_error("JSON Object not readable");
mergeInto(*wpObj, *doc.pObj);
}
// Returns whether or not a "path" exists.
// Returns true if all elements along path exist
// Returns false if any elements along the path are MISSING
// Sets out to the value of the thing that path refers to
// Will throw if a non-terminating path element exists BUT is not a JSON Object.
// Will throw if all elements along path exists but T is an incompatible type
template <typename T> bool get(const std::string path, T &out, bool split=true) {
bool r = has(path, split);
if (r)
out = pLast->get_value<T>();
return r;
}
// For convenience, wraps get() in a try/catch and returns false UNLESS the path existed and was a compatible type.
template <typename T> bool tryGet(const std::string path, T &out, bool split=true) {
try { return get(path, out, split); } catch(...) {}
return false;
}
const json_spirit::mValue & at(const std::string path, bool split=true) {
if (has(path, split))
return last();
throw std::runtime_error("JSON path doesn't exist");
}
const json_spirit::mValue & operator[](const std::string path) {
return at(path);
}
const json_spirit::mValue & last() const { return *pLast; }
bool valid() const { return pObj != NULL; }
const json_spirit::mObject & obj() {
// This dummy object is necessary to make working with obj() easier when this does not currently
// point to a valid mObject. valid() can be called to explicitly check for this scenario, but
// calling obj() at least will not seg fault and instead return a const reference to an empty mObject.
// This is very useful when iterating using obj() to access the underlying mObject.
static const json_spirit::mObject dummy;
return pObj ? *pObj : dummy;
}
// Return reference to writeable underlying mObject but only if *this was initialized with a writeable value or object
json_spirit::mObject & wobj() {
ASSERT(wpObj != nullptr);
return *wpObj;
}
// This is the version used to represent 'now' for use by the $expires operator.
// By default, nothing will expire and it is up to the user of JSONDoc to update this value if
// it is intended to be used.
// This is slightly hackish but otherwise the JSON merge functions would require a Transaction.
static uint64_t expires_reference_version;
private:
const json_spirit::mObject *pObj;
// Writeable pointer to the same object. Will be NULL if initialized from a const object.
json_spirit::mObject *wpObj;
const json_spirit::mValue *pLast;
};
// Typedef to cover older code that was written when this class was only a reader and called StatusObjectReader
typedef JSONDoc StatusObjectReader;

View File

@ -220,6 +220,7 @@ public:
return task;
}
// Verify that the user configured task verification key still has the user specificied value
ACTOR static Future<bool> taskVerify(Reference<TaskBucket> tb, Reference<ReadYourWritesTransaction> tr, Reference<Task> task) {
if (task->params.find(Task::reservedTaskParamValidKey) == task->params.end()) {
@ -503,6 +504,7 @@ public:
return false;
}
// Verify that the task's keys are still in the timeout space at the expected timeout prefix
ACTOR static Future<bool> isFinished(Reference<ReadYourWritesTransaction> tr, Reference<TaskBucket> taskBucket, Reference<Task> task) {
taskBucket->setOptions(tr);

View File

@ -24,7 +24,6 @@
#include "libb64/encode.h"
#include "sha1/SHA1.h"
#include "time.h"
#include "fdbclient/json_spirit/json_spirit_reader_template.h"
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
@ -66,12 +65,10 @@ BlobStoreEndpoint::BlobKnobs::BlobKnobs() {
read_cache_blocks_per_file = CLIENT_KNOBS->BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE;
max_send_bytes_per_second = CLIENT_KNOBS->BLOBSTORE_MAX_RECV_BYTES_PER_SECOND;
max_recv_bytes_per_second = CLIENT_KNOBS->BLOBSTORE_MAX_SEND_BYTES_PER_SECOND;
buckets_to_span = CLIENT_KNOBS->BLOBSTORE_BACKUP_BUCKETS;
}
bool BlobStoreEndpoint::BlobKnobs::set(StringRef name, int value) {
#define TRY_PARAM(n, sn) if(name == LiteralStringRef(#n) || name == LiteralStringRef(#sn)) { n = value; return true; }
TRY_PARAM(buckets_to_span, bts);
TRY_PARAM(connect_tries, ct);
TRY_PARAM(connect_timeout, cto);
TRY_PARAM(max_connection_life, mcl);
@ -98,7 +95,6 @@ std::string BlobStoreEndpoint::BlobKnobs::getURLParameters() const {
static BlobKnobs defaults;
std::string r;
#define _CHECK_PARAM(n, sn) if(n != defaults. n) { r += format("%s%s=%d", r.empty() ? "" : "&", #sn, n); }
_CHECK_PARAM(buckets_to_span, bts);
_CHECK_PARAM(connect_tries, ct);
_CHECK_PARAM(connect_timeout, cto);
_CHECK_PARAM(max_connection_life, mcl);
@ -184,7 +180,7 @@ Reference<BlobStoreEndpoint> BlobStoreEndpoint::fromString(std::string const &ur
if(error != nullptr)
*error = err;
TraceEvent(SevWarnAlways, "BlobStoreEndpoint").detail("Description", err).detail("Format", getURLFormat()).detail("URL", url);
throw file_not_found();
throw backup_invalid_url();
}
}
@ -226,19 +222,23 @@ Future<Void> BlobStoreEndpoint::deleteObject(std::string const &bucket, std::str
}
ACTOR Future<Void> deleteBucket_impl(Reference<BlobStoreEndpoint> b, std::string bucket, int *pNumDeleted) {
state PromiseStream<BlobStoreEndpoint::ObjectInfo> resultStream;
state Future<Void> done = b->getBucketContentsStream(bucket, resultStream);
state PromiseStream<BlobStoreEndpoint::ListResult> resultStream;
state Future<Void> done = b->listBucketStream(bucket, resultStream);
state std::vector<Future<Void>> deleteFutures;
loop {
choose {
when(Void _ = wait(done)) {
break;
}
when(BlobStoreEndpoint::ObjectInfo info = waitNext(resultStream.getFuture())) {
if(pNumDeleted == nullptr)
deleteFutures.push_back(b->deleteObject(bucket, info.name));
else
deleteFutures.push_back(map(b->deleteObject(bucket, info.name), [this](Void) -> Void { ++*pNumDeleted; return Void(); }));
when(BlobStoreEndpoint::ListResult list = waitNext(resultStream.getFuture())) {
for(auto &object : list.objects) {
int *pNumDeletedCopy = pNumDeleted; // avoid capture of this
deleteFutures.push_back(map(b->deleteObject(bucket, object.name), [pNumDeletedCopy](Void) -> Void {
if(pNumDeletedCopy != nullptr)
++*pNumDeletedCopy;
return Void();
}));
}
}
}
}
@ -251,6 +251,18 @@ Future<Void> BlobStoreEndpoint::deleteBucket(std::string const &bucket, int *pNu
return deleteBucket_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, pNumDeleted);
}
ACTOR Future<Void> createBucket_impl(Reference<BlobStoreEndpoint> b, std::string bucket) {
std::string resource = std::string("/") + bucket;
HTTP::Headers headers;
Reference<HTTP::Response> r = wait(b->doRequest("PUT", resource, headers, NULL, 0, {200, 409}));
return Void();
}
Future<Void> BlobStoreEndpoint::createBucket(std::string const &bucket) {
return createBucket_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket);
}
ACTOR Future<int64_t> objectSize_impl(Reference<BlobStoreEndpoint> b, std::string bucket, std::string object) {
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
@ -429,9 +441,16 @@ Future<Reference<HTTP::Response>> BlobStoreEndpoint::doRequest(std::string const
return doRequest_impl(Reference<BlobStoreEndpoint>::addRef(this), verb, resource, headers, pContent, contentLen, successCodes);
}
ACTOR Future<Void> getBucketContentsStream_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, PromiseStream<BlobStoreEndpoint::ObjectInfo> results) {
ACTOR Future<Void> listBucketStream_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, PromiseStream<BlobStoreEndpoint::ListResult> results, Optional<std::string> prefix, Optional<char> delimiter) {
// Request 1000 keys at a time, the maximum allowed
state std::string resource = std::string("/") + bucket + "/?max-keys=1000&marker=";
state std::string resource = "/";
resource.append(bucket);
resource.append("/?max-keys=1000");
if(prefix.present())
resource.append("&prefix=").append(HTTP::urlEncode(prefix.get()));
if(delimiter.present())
resource.append("&delimiter=").append(HTTP::urlEncode(std::string(delimiter.get(), 1)));
resource.append("&marker=");
state std::string lastFile;
state bool more = true;
@ -440,32 +459,31 @@ ACTOR Future<Void> getBucketContentsStream_impl(Reference<BlobStoreEndpoint> bst
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource + HTTP::urlEncode(lastFile), headers, NULL, 0, {200}));
try {
BlobStoreEndpoint::ListResult result;
// Parse the json assuming it is valid and contains the right stuff. If any exceptions are thrown, throw http_bad_response
json_spirit::Value json;
json_spirit::mValue json;
json_spirit::read_string(r->content, json);
for(auto &i : json.get_obj()) {
if(i.name_ == "truncated") {
more = i.value_.get_bool();
}
else if(i.name_ == "results") {
BlobStoreEndpoint::ObjectInfo info;
info.bucket = bucket;
for(auto &o : i.value_.get_array()) {
info.size = -1;
info.name.clear();
for(auto &f : o.get_obj()) {
if(f.name_ == "size")
info.size = f.value_.get_int();
else if(f.name_ == "key")
info.name = f.value_.get_str();
}
if(info.size >= 0 && !info.name.empty()) {
lastFile = info.name;
results.send(std::move(info));
JSONDoc doc(json);
doc.tryGet("truncated", more);
if(doc.has("results")) {
for(auto &jsonObject : doc.at("results").get_array()) {
JSONDoc objectDoc(jsonObject);
BlobStoreEndpoint::ObjectInfo object;
objectDoc.get("size", object.size);
objectDoc.get("key", object.name);
result.objects.push_back(std::move(object));
}
}
if(doc.has("CommonPrefixes")) {
for(auto &jsonObject : doc.at("CommonPrefixes").get_array()) {
JSONDoc objectDoc(jsonObject);
std::string prefix;
objectDoc.get("Prefix", prefix);
result.commonPrefixes.push_back(std::move(prefix));
}
}
results.send(result);
} catch(Error &e) {
throw http_bad_response();
}
@ -474,29 +492,30 @@ ACTOR Future<Void> getBucketContentsStream_impl(Reference<BlobStoreEndpoint> bst
return Void();
}
Future<Void> BlobStoreEndpoint::getBucketContentsStream(std::string const &bucket, PromiseStream<BlobStoreEndpoint::ObjectInfo> results) {
return getBucketContentsStream_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, results);
Future<Void> BlobStoreEndpoint::listBucketStream(std::string const &bucket, PromiseStream<ListResult> results, Optional<std::string> prefix, Optional<char> delimiter) {
return listBucketStream_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, results, prefix, delimiter);
}
ACTOR Future<BlobStoreEndpoint::BucketContentsT> getBucketContents_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket) {
state BlobStoreEndpoint::BucketContentsT results;
state PromiseStream<BlobStoreEndpoint::ObjectInfo> resultStream;
state Future<Void> done = bstore->getBucketContentsStream(bucket, resultStream);
ACTOR Future<BlobStoreEndpoint::ListResult> listBucket_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, Optional<std::string> prefix, Optional<char> delimiter) {
state BlobStoreEndpoint::ListResult results;
state PromiseStream<BlobStoreEndpoint::ListResult> resultStream;
state Future<Void> done = bstore->listBucketStream(bucket, resultStream, prefix, delimiter);
loop {
choose {
when(Void _ = wait(done)) {
break;
}
when(BlobStoreEndpoint::ObjectInfo info = waitNext(resultStream.getFuture())) {
results.push_back(info);
when(BlobStoreEndpoint::ListResult info = waitNext(resultStream.getFuture())) {
results.commonPrefixes.insert(results.commonPrefixes.end(), info.commonPrefixes.begin(), info.commonPrefixes.end());
results.objects.insert(results.objects.end(), info.objects.begin(), info.objects.end());
}
}
}
return results;
}
Future<BlobStoreEndpoint::BucketContentsT> BlobStoreEndpoint::getBucketContents(std::string const &bucket) {
return getBucketContents_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket);
Future<BlobStoreEndpoint::ListResult> BlobStoreEndpoint::listBucket(std::string const &bucket, Optional<std::string> prefix, Optional<char> delimiter) {
return listBucket_impl(Reference<BlobStoreEndpoint>::addRef(this), bucket, prefix, delimiter);
}
std::string BlobStoreEndpoint::hmac_sha1(std::string const &msg) {
@ -639,7 +658,9 @@ ACTOR Future<int> readObject_impl(Reference<BlobStoreEndpoint> bstore, std::stri
std::string resource = std::string("/") + bucket + "/" + object;
HTTP::Headers headers;
headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1);
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 206}));
Reference<HTTP::Response> r = wait(bstore->doRequest("GET", resource, headers, NULL, 0, {200, 206, 404}));
if(r->code == 404)
throw file_not_found();
if(r->contentLen != r->content.size()) // Double check that this wasn't a header-only response, probably unnecessary
throw io_error();
// Copy the output bytes, server could have sent more or less bytes than requested so copy at most length bytes

View File

@ -26,7 +26,7 @@
#include "fdbclient/Knobs.h"
#include "IRateControl.h"
#include "HTTP.h"
#include "fdbclient/json_spirit/json_spirit_writer_template.h"
#include "JSONDoc.h"
// Representation of all the things you need to connect to a blob store instance with some credentials.
// Reference counted because a very large number of them could be needed.
@ -63,8 +63,7 @@ public:
read_ahead_blocks,
read_cache_blocks_per_file,
max_send_bytes_per_second,
max_recv_bytes_per_second,
buckets_to_span;
max_recv_bytes_per_second;
bool set(StringRef name, int value);
std::string getURLParameters() const;
static std::vector<std::string> getKnobDescriptions() {
@ -85,8 +84,7 @@ public:
"read_ahead_blocks (or rab) Number of blocks to read ahead of requested offset.",
"read_cache_blocks_per_file (or rcb) Size of the read cache for a file in blocks.",
"max_send_bytes_per_second (or sbps) Max send bytes per second for all requests combined.",
"max_recv_bytes_per_second (or rbps) Max receive bytes per second for all requests combined (NOT YET USED).",
"buckets_to_span (or bts) Number of buckets that a new backup should distribute over."
"max_recv_bytes_per_second (or rbps) Max receive bytes per second for all requests combined (NOT YET USED)."
};
}
};
@ -148,18 +146,22 @@ public:
// Every blob store interaction should ultimately go through this function
Future<Reference<HTTP::Response>> doRequest(std::string const &verb, std::string const &resource, const HTTP::Headers &headers, UnsentPacketQueue *pContent, int contentLen, std::set<unsigned int> successCodes);
struct ObjectInfo {
std::string bucket;
std::string name;
int64_t size;
};
struct ListResult {
std::vector<std::string> commonPrefixes;
std::vector<ObjectInfo> objects;
};
// Get bucket contents via a stream, since listing large buckets will take many serial blob requests
Future<Void> getBucketContentsStream(std::string const &bucket, PromiseStream<ObjectInfo> results);
Future<Void> listBucketStream(std::string const &bucket, PromiseStream<ListResult> results, Optional<std::string> prefix = {}, Optional<char> delimiter = {});
// Get a list of the files in a bucket
typedef std::vector<ObjectInfo> BucketContentsT;
Future<BucketContentsT> getBucketContents(std::string const &bucket);
Future<ListResult> listBucket(std::string const &bucket, Optional<std::string> prefix = {}, Optional<char> delimiter = {});
// Check if an object exists in a bucket
Future<bool> objectExists(std::string const &bucket, std::string const &object);
@ -179,6 +181,9 @@ public:
// a deletion of an object completes.
Future<Void> deleteBucket(std::string const &bucket, int *pNumDeleted = NULL);
// Create a bucket if it does not already exists.
Future<Void> createBucket(std::string const &bucket);
// Useful methods for working with tiny files
Future<std::string> readEntireFile(std::string const &bucket, std::string const &object);
Future<Void> writeEntireFile(std::string const &bucket, std::string const &object, std::string const &content);

310
fdbrpc/JSONDoc.h Normal file
View File

@ -0,0 +1,310 @@
/*
* JSONDoc.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "json_spirit/json_spirit_writer_template.h"
#include "json_spirit/json_spirit_reader_template.h"
// JSONDoc is a convenient reader/writer class for manipulating JSON documents using "paths".
// Access is done using a "path", which is a string of dot-separated
// substrings representing representing successively deeper keys found in nested
// JSON objects within the top level object
//
// Most methods are read-only with respect to the source JSON object.
// The only modifying methods are create(), put(), subDoc(), and mergeInto()
//
// JSONDoc maintains some state which is the JSON value that was found during the most recent
// *successful* path lookup.
//
// Examples:
// JSONDoc r(some_obj);
//
// // See if JSON doc path a.b.c exists
// bool exists = r.has("a.b.c");
//
// // See if JSON doc path a.b.c exists, if it does then assign value to x. Throws if path exists but T is not compatible.
// T x;
// bool exists = r.has("a.b.c", x);
//
// // This way you can chain things like this:
// bool is_two = r.has("a.b.c", x) && x == 2;
//
// // Alternatively, you can avoid the temp var by making use of the last() method which returns a reference
// // to the JSON value at the last successfully found path that has() has seen.
// bool is_int = r.has("a.b.c") && r.last().type == json_spirit::int_type;
// bool is_two = r.has("a.b.c") && r.last().get_int() == 2;
//
// // The familiar at() method also exists but now supports the same path concept.
// // It will throw in the same circumstances as the original method
// int x = r.at("a.b.c").get_int();
//
// // If you wish to access an element with the dot character within its name (e.g., "hostname.example.com"),
// // you can do so by setting the "split" flag to false in either the "has" or "get" methods. The example
// // below will look for the key "hostname.example.com" as a subkey of the path "a.b.c" (or, more
// // precisely, it will look to see if r.has("a").has("b").has("c").has("hostname.example.com", false)).
// bool exists = r.has("a.b.c").has("hostname.example.com", false);
//
// // And the familiar operator[] interface exists as well, however only as a synonym for at()
// // because this class is only for reading. Using operator [] will not auto-create null things.
// // The following would throw if a.b.c did not exist, or if it was not an int.
// int x = r["a.b.c"].get_int();
struct JSONDoc {
JSONDoc() : pObj(NULL) {}
// Construction from const json_spirit::mObject, trivial and will never throw.
// Resulting JSONDoc will not allow modifications.
JSONDoc(const json_spirit::mObject &o) : pObj(&o), wpObj(NULL) {}
// Construction from json_spirit::mObject. Allows modifications.
JSONDoc(json_spirit::mObject &o) : pObj(&o), wpObj(&o) {}
// Construction from const json_spirit::mValue (which is a Variant type) which will try to
// convert it to an mObject. This will throw if that fails, just as it would
// if the caller called get_obj() itself and used the previous constructor instead.
JSONDoc(const json_spirit::mValue &v) : pObj(&v.get_obj()), wpObj(NULL) {}
// Construction from non-const json_spirit::mValue - will convert the mValue to
// an object if it isn't already and then attach to it.
JSONDoc(json_spirit::mValue &v) {
if(v.type() != json_spirit::obj_type)
v = json_spirit::mObject();
wpObj = &v.get_obj();
pObj = wpObj;
}
// Returns whether or not a "path" exists.
// Returns true if all elements along path exist
// Returns false if any elements along the path are MISSING
// Will throw if a non-terminating path element exists BUT is not a JSON Object.
// If the "split" flag is set to "false", then this skips the splitting of a
// path into on the "dot" character.
// When a path is found, pLast is updated.
bool has(std::string path, bool split=true) {
if (pObj == NULL)
return false;
if (path.empty())
return false;
size_t start = 0;
const json_spirit::mValue *curVal = NULL;
while (start < path.size())
{
// If a path segment is found then curVal must be an object
size_t dot;
if (split) {
dot = path.find_first_of('.', start);
if (dot == std::string::npos)
dot = path.size();
} else {
dot = path.size();
}
std::string key = path.substr(start, dot - start);
// Get pointer to the current Object that the key has to be in
// This will throw if the value is not an Object
const json_spirit::mObject *curObj = curVal ? &curVal->get_obj() : pObj;
// Make sure key exists, if not then return false
if (!curObj->count(key))
return false;
// Advance curVal
curVal = &curObj->at(key);
// Advance start position in path
start = dot + 1;
}
pLast = curVal;
return true;
}
// Creates the given path (forcing Objects to exist along its depth, replacing whatever else might have been there)
// and returns a reference to the Value at that location.
json_spirit::mValue & create(std::string path, bool split=true) {
if (wpObj == NULL || path.empty())
throw std::runtime_error("JSON Object not writable or bad JSON path");
size_t start = 0;
json_spirit::mValue *curVal = nullptr;
while (start < path.size())
{
// Get next path segment name
size_t dot;
if (split) {
dot = path.find_first_of('.', start);
if (dot == std::string::npos)
dot = path.size();
} else {
dot = path.size();
}
std::string key = path.substr(start, dot - start);
if(key.empty())
throw std::runtime_error("invalid JSON path");
// Get/create pointer to the current Object that the key has to be in
// If curVal is defined then force it to be an Object
json_spirit::mObject *curObj;
if(curVal != nullptr) {
if(curVal->type() != json_spirit::obj_type)
*curVal = json_spirit::mObject();
curObj = &curVal->get_obj();
}
else // Otherwise start with the object *this is writing to
curObj = wpObj;
// Make sure key exists, if not then return false
if (!curObj->count(key))
(*curObj)[key] = json_spirit::mValue();
// Advance curVal
curVal = &((*curObj)[key]);
// Advance start position in path
start = dot + 1;
}
return *curVal;
}
// Creates the path given, puts a value at it, and returns a reference to the value
template<typename T>
T & put(std::string path, const T & value, bool split=true) {
json_spirit::mValue &v = create(path, split);
v = value;
return v.get_value<T>();
}
// Ensures that a an Object exists at path and returns a JSONDoc that writes to it.
JSONDoc subDoc(std::string path, bool split=true) {
json_spirit::mValue &v = create(path, split);
if(v.type() != json_spirit::obj_type)
v = json_spirit::mObject();
return JSONDoc(v.get_obj());
}
// Apply a merge operation to two values. Works for int, double, and string
template <typename T>
static json_spirit::mObject mergeOperator(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, T const &a, T const &b) {
if(op == "$max")
return {{op, std::max<T>(a, b)}};
if(op == "$min")
return {{op, std::min<T>(a, b)}};
if(op == "$sum")
return {{op, a + b}};
throw std::exception();
}
// This is just a convenience function to make calling mergeOperator look cleaner
template <typename T>
static json_spirit::mObject mergeOperatorWrapper(const std::string &op, const json_spirit::mObject &op_a, const json_spirit::mObject &op_b, const json_spirit::mValue &a, const json_spirit::mValue &b) {
return mergeOperator<T>(op, op_a, op_b, a.get_value<T>(), b.get_value<T>());
}
static inline std::string getOperator(const json_spirit::mObject &obj) {
for(auto &k : obj)
if(!k.first.empty() && k.first[0] == '$')
return k.first;
return std::string();
}
// Merge src into dest, applying merge operators
static void mergeInto(json_spirit::mObject &dst, const json_spirit::mObject &src);
static void mergeValueInto(json_spirit::mValue &d, const json_spirit::mValue &s);
// Remove any merge operators that never met any mates.
static void cleanOps(json_spirit::mObject &obj);
void cleanOps() {
if(wpObj == nullptr)
throw std::runtime_error("JSON Object not writable");
return cleanOps(*wpObj);
}
void absorb(const JSONDoc &doc) {
if(wpObj == nullptr)
throw std::runtime_error("JSON Object not writable");
if(doc.pObj == nullptr)
throw std::runtime_error("JSON Object not readable");
mergeInto(*wpObj, *doc.pObj);
}
// Returns whether or not a "path" exists.
// Returns true if all elements along path exist
// Returns false if any elements along the path are MISSING
// Sets out to the value of the thing that path refers to
// Will throw if a non-terminating path element exists BUT is not a JSON Object.
// Will throw if all elements along path exists but T is an incompatible type
template <typename T> bool get(const std::string path, T &out, bool split=true) {
bool r = has(path, split);
if (r)
out = pLast->get_value<T>();
return r;
}
// For convenience, wraps get() in a try/catch and returns false UNLESS the path existed and was a compatible type.
template <typename T> bool tryGet(const std::string path, T &out, bool split=true) {
try { return get(path, out, split); } catch(...) {}
return false;
}
const json_spirit::mValue & at(const std::string path, bool split=true) {
if (has(path, split))
return last();
throw std::runtime_error("JSON path doesn't exist");
}
const json_spirit::mValue & operator[](const std::string path) {
return at(path);
}
const json_spirit::mValue & last() const { return *pLast; }
bool valid() const { return pObj != NULL; }
const json_spirit::mObject & obj() {
// This dummy object is necessary to make working with obj() easier when this does not currently
// point to a valid mObject. valid() can be called to explicitly check for this scenario, but
// calling obj() at least will not seg fault and instead return a const reference to an empty mObject.
// This is very useful when iterating using obj() to access the underlying mObject.
static const json_spirit::mObject dummy;
return pObj ? *pObj : dummy;
}
// Return reference to writeable underlying mObject but only if *this was initialized with a writeable value or object
json_spirit::mObject & wobj() {
ASSERT(wpObj != nullptr);
return *wpObj;
}
// This is the version used to represent 'now' for use by the $expires operator.
// By default, nothing will expire and it is up to the user of JSONDoc to update this value if
// it is intended to be used.
// This is slightly hackish but otherwise the JSON merge functions would require a Transaction.
static uint64_t expires_reference_version;
private:
const json_spirit::mObject *pObj;
// Writeable pointer to the same object. Will be NULL if initialized from a const object.
json_spirit::mObject *wpObj;
const json_spirit::mValue *pLast;
};

View File

@ -162,36 +162,42 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
state int resultWait = wait(backupAgent->waitBackup(cx, backupTag.tagName, false));
UidAndAbortedFlagT uidFlag = wait(backupTag.getOrThrow(cx));
state UID logUid = uidFlag.first;
state std::string lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getOrThrow(cx, false, backup_unneeded()));
state Reference<IBackupContainer> lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getD(cx));
state std::string restorableFile = joinPath(lastBackupContainer, "restorable");
TraceEvent("BARW_lastBackupContainer", randomID).detail("backupTag", printable(tag)).detail("lastBackupContainer", lastBackupContainer)
.detail("logUid", logUid).detail("waitStatus", resultWait).detail("restorable", restorableFile);
state bool restorable = false;
if(lastBackupContainer) {
BackupDescription desc = wait(lastBackupContainer->describeBackup());
restorable = desc.maxRestorableVersion.present();
}
TraceEvent("BARW_lastBackupContainer", randomID)
.detail("backupTag", printable(tag))
.detail("lastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "")
.detail("logUid", logUid).detail("waitStatus", resultWait).detail("restorable", restorable);
// Do not check the backup, if aborted
if (resultWait == BackupAgentBase::STATE_ABORTED) {
}
// Ensure that a backup container was found
else if (lastBackupContainer.empty()) {
else if (!lastBackupContainer) {
TraceEvent("BARW_missingBackupContainer", randomID).detail("logUid", logUid).detail("backupTag", printable(tag)).detail("waitStatus", resultWait);
printf("BackupCorrectnessMissingBackupContainer tag: %s status: %d\n", printable(tag).c_str(), resultWait);
}
// Ensure that the restorable file is present
// Check that backup is restorable
else {
bool rfExists = wait(IBackupContainer::openContainer(lastBackupContainer)->fileExists(restorableFile));
if(!rfExists) {
TraceEvent("BARW_missingBackupRestoreFile", randomID).detail("logUid", logUid).detail("backupTag", printable(tag))
.detail("backupFolder", lastBackupContainer).detail("restorable", restorableFile).detail("waitStatus", resultWait);
printf("BackupCorrectnessMissingRestorable: %s tag: %s\n", restorableFile.c_str(), printable(tag).c_str());
if(!restorable) {
TraceEvent("BARW_notRestorable", randomID).detail("logUid", logUid).detail("backupTag", printable(tag))
.detail("backupFolder", lastBackupContainer->getURL()).detail("waitStatus", resultWait);
printf("BackupCorrectnessNotRestorable: tag: %s\n", printable(tag).c_str());
}
}
// Abort the backup, if not the first backup because the second backup may have aborted the backup by now
if (startDelay) {
TraceEvent("BARW_doBackupAbortBackup2", randomID).detail("tag", printable(tag))
.detail("waitStatus", resultWait).detail("lastBackupContainer", lastBackupContainer).detail("restorable", restorableFile);
.detail("waitStatus", resultWait)
.detail("lastBackupContainer", lastBackupContainer ? lastBackupContainer->getURL() : "")
.detail("restorable", restorable);
Void _ = wait(backupAgent->abortBackup(cx, tag.toString()));
}
else {
@ -309,7 +315,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
state KeyBackedTag keyBackedTag = makeBackupTag(self->backupTag.toString());
UidAndAbortedFlagT uidFlag = wait(keyBackedTag.getOrThrow(cx));
state UID logUid = uidFlag.first;
state std::string lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getOrThrow(cx));
state Reference<IBackupContainer> lastBackupContainer = wait(BackupConfig(logUid).backupContainer().getD(cx));
// Occasionally start yet another backup that might still be running when we restore
if (!self->locked && BUGGIFY) {
@ -327,9 +333,9 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
TEST(!startRestore.isReady()); //Restore starts at specified time
Void _ = wait(startRestore);
if ((lastBackupContainer.size()) && (self->performRestore)) {
if (lastBackupContainer && self->performRestore) {
if (g_random->random01() < 0.5) {
Void _ = wait(attemptDirtyRestore(self, cx, &backupAgent, StringRef(lastBackupContainer), randomID));
Void _ = wait(attemptDirtyRestore(self, cx, &backupAgent, StringRef(lastBackupContainer->getURL()), randomID));
}
Void _ = wait(runRYWTransaction(cx, [=](Reference<ReadYourWritesTransaction> tr) -> Future<Void> {
for (auto &kvrange : self->backupRanges)
@ -338,7 +344,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
}));
// restore database
TraceEvent("BARW_restore", randomID).detail("lastBackupContainer", lastBackupContainer).detail("restoreAfter", self->restoreAfter).detail("backupTag", printable(self->backupTag));
TraceEvent("BARW_restore", randomID).detail("lastBackupContainer", lastBackupContainer->getURL()).detail("restoreAfter", self->restoreAfter).detail("backupTag", printable(self->backupTag));
state std::vector<Future<Version>> restores;
state std::vector<Standalone<StringRef>> restoreTags;
@ -348,7 +354,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
auto range = self->backupRanges[restoreIndex];
Standalone<StringRef> restoreTag(self->backupTag.toString() + "_" + std::to_string(restoreIndex));
restoreTags.push_back(restoreTag);
restores.push_back(backupAgent.restore(cx, restoreTag, KeyRef(lastBackupContainer), true, -1, true, range, Key(), Key(), self->locked));
restores.push_back(backupAgent.restore(cx, restoreTag, KeyRef(lastBackupContainer->getURL()), true, -1, true, range, Key(), Key(), self->locked));
}
// Sometimes kill and restart the restore
@ -363,7 +369,7 @@ struct BackupAndRestoreCorrectnessWorkload : TestWorkload {
tr->clear(self->backupRanges[restoreIndex]);
return Void();
}));
restores[restoreIndex] = backupAgent.restore(cx, restoreTags[restoreIndex], KeyRef(lastBackupContainer), true, -1, true, self->backupRanges[restoreIndex], Key(), Key(), self->locked);
restores[restoreIndex] = backupAgent.restore(cx, restoreTags[restoreIndex], KeyRef(lastBackupContainer->getURL()), true, -1, true, self->backupRanges[restoreIndex], Key(), Key(), self->locked);
}
}
}

View File

@ -1887,6 +1887,21 @@ std::vector<std::string> listFiles( std::string const& directory, std::string co
std::vector<std::string> listDirectories( std::string const& directory ) {
return findFiles( directory, "", &acceptDirectory );
}
void findFilesRecursively(std::string path, std::vector<std::string> &out) {
// Add files to output, prefixing path
std::vector<std::string> files = platform::listFiles(path);
for(auto const &f : files)
out.push_back(joinPath(path, f));
// Recurse for directories
std::vector<std::string> directories = platform::listDirectories(path);
for(auto const &dir : directories) {
if(dir != "." && dir != "..")
findFilesRecursively(joinPath(path, dir), out);
}
};
}; // namespace platform

View File

@ -330,6 +330,8 @@ std::vector<std::string> listFiles( std::string const& directory, std::string co
// returns directory names relative to directory
std::vector<std::string> listDirectories( std::string const& directory );
void findFilesRecursively(std::string path, std::vector<std::string> &out);
// Tag the given file as "temporary", i.e. not really needing commits to disk
void makeTemporary( const char* filename );

View File

@ -169,12 +169,14 @@ ERROR( restore_error, 2301, "Restore error")
ERROR( backup_duplicate, 2311, "Backup duplicate request")
ERROR( backup_unneeded, 2312, "Backup unneeded request")
ERROR( backup_bad_block_size, 2313, "Backup file block size too small")
ERROR( backup_invalid_url, 2314, "Backup Container URL invalid")
ERROR( backup_invalid_info, 2315, "Backup Container URL invalid")
ERROR( restore_invalid_version, 2361, "Invalid restore version")
ERROR( restore_corrupted_data, 2362, "Corrupted backup data")
ERROR( restore_missing_data, 2363, "Missing backup data")
ERROR( restore_duplicate_tag, 2364, "Restore duplicate request")
ERROR( restore_unknown_tag, 2365, "Restore tag does not exist")
ERROR( restore_unknown_file_type, 2366, "Unknown backup file type")
ERROR( restore_unknown_file_type, 2366, "Unknown backup/restore file type")
ERROR( restore_unsupported_file_version, 2367, "Unsupported backup file version")
ERROR( restore_bad_read, 2368, "Unexpected number of bytes read")
ERROR( restore_corrupted_data_padding, 2369, "Backup file has unexpected padding bytes")