2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* BackupContainer.h
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-10-22 12:31:15 +08:00
|
|
|
#ifndef FDBCLIENT_BACKUP_CONTAINER_H
|
|
|
|
#define FDBCLIENT_BACKUP_CONTAINER_H
|
2017-05-26 04:48:44 +08:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "flow/flow.h"
|
|
|
|
#include "fdbrpc/IAsyncFile.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbclient/FDBTypes.h"
|
2019-02-18 07:41:16 +08:00
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbclient/ReadYourWrites.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2019-05-10 11:55:44 +08:00
|
|
|
class ReadYourWritesTransaction;
|
|
|
|
|
2018-01-23 15:57:01 +08:00
|
|
|
Future<Optional<int64_t>> timeKeeperEpochsFromVersion(Version const &v, Reference<ReadYourWritesTransaction> const &tr);
|
2018-01-23 16:19:51 +08:00
|
|
|
Future<Version> timeKeeperVersionFromDatetime(std::string const &datetime, Database const &db);
|
2018-01-23 15:57:01 +08:00
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
// Append-only file interface for writing backup data
|
2017-11-27 14:02:14 +08:00
|
|
|
// Once finish() is called the file cannot be further written to.
|
|
|
|
// Backup containers should not attempt to use files for which finish was not called or did not complete.
|
2017-11-16 05:33:09 +08:00
|
|
|
// TODO: Move the log file and range file format encoding/decoding stuff to this file and behind interfaces.
|
2017-11-15 15:33:17 +08:00
|
|
|
class IBackupFile {
|
2017-05-26 04:48:44 +08:00
|
|
|
public:
|
2021-01-16 02:04:09 +08:00
|
|
|
<<<<<<< HEAD
|
2020-10-22 13:19:15 +08:00
|
|
|
IBackupFile(const std::string& fileName) : m_fileName(fileName), m_offset(0) {}
|
2021-01-16 02:04:09 +08:00
|
|
|
=======
|
2021-01-09 23:03:47 +08:00
|
|
|
IBackupFile(std::string fileName) : m_fileName(fileName) {}
|
2021-01-16 02:04:09 +08:00
|
|
|
>>>>>>> release-6.3
|
2017-11-15 15:33:17 +08:00
|
|
|
virtual ~IBackupFile() {}
|
|
|
|
// Backup files are append-only and cannot have more than 1 append outstanding at once.
|
2017-11-16 05:33:09 +08:00
|
|
|
virtual Future<Void> append(const void *data, int len) = 0;
|
2017-11-15 15:33:17 +08:00
|
|
|
virtual Future<Void> finish() = 0;
|
|
|
|
inline std::string getFileName() const {
|
|
|
|
return m_fileName;
|
|
|
|
}
|
2021-01-09 23:03:47 +08:00
|
|
|
virtual int64_t size() const = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
virtual void addref() = 0;
|
|
|
|
virtual void delref() = 0;
|
2017-11-16 05:33:09 +08:00
|
|
|
|
2018-01-17 20:09:43 +08:00
|
|
|
Future<Void> appendStringRefWithLen(Standalone<StringRef> s);
|
2017-11-15 15:33:17 +08:00
|
|
|
protected:
|
|
|
|
std::string m_fileName;
|
|
|
|
};
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
// Structures for various backup components
|
2020-02-15 03:27:02 +08:00
|
|
|
|
|
|
|
// Mutation log version written by old FileBackupAgent
|
|
|
|
static const uint32_t BACKUP_AGENT_MLOG_VERSION = 2001;
|
|
|
|
|
|
|
|
// Mutation log version written by BackupWorker
|
|
|
|
static const uint32_t PARTITIONED_MLOG_VERSION = 4110;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-04-28 04:59:45 +08:00
|
|
|
// Snapshot file version written by FileBackupAgent
|
|
|
|
static const uint32_t BACKUP_AGENT_SNAPSHOT_FILE_VERSION = 1001;
|
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
struct LogFile {
|
|
|
|
Version beginVersion;
|
|
|
|
Version endVersion;
|
|
|
|
uint32_t blockSize;
|
|
|
|
std::string fileName;
|
|
|
|
int64_t fileSize;
|
2019-10-26 07:17:18 +08:00
|
|
|
int tagId = -1; // Log router tag. Non-negative for new backup format.
|
2020-02-21 08:28:27 +08:00
|
|
|
int totalTags = -1; // Total number of log router tags.
|
2017-11-15 15:33:17 +08:00
|
|
|
|
|
|
|
// Order by beginVersion, break ties with endVersion
|
|
|
|
bool operator< (const LogFile &rhs) const {
|
|
|
|
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
|
|
|
|
}
|
2018-11-30 02:31:47 +08:00
|
|
|
|
2020-03-05 02:52:51 +08:00
|
|
|
// Returns if this log file contains a subset of content of the given file
|
|
|
|
// by comparing version range and tag ID.
|
|
|
|
bool isSubset(const LogFile& rhs) const {
|
2020-03-13 06:30:07 +08:00
|
|
|
return beginVersion >= rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
2019-11-21 12:32:15 +08:00
|
|
|
}
|
|
|
|
|
2020-03-26 13:53:22 +08:00
|
|
|
bool isPartitionedLog() const {
|
|
|
|
return tagId >= 0 && tagId < totalTags;
|
|
|
|
}
|
|
|
|
|
2018-11-30 02:31:47 +08:00
|
|
|
std::string toString() const {
|
2019-09-04 06:50:21 +08:00
|
|
|
std::stringstream ss;
|
2019-10-26 07:17:18 +08:00
|
|
|
ss << "beginVersion:" << std::to_string(beginVersion) << " endVersion:" << std::to_string(endVersion)
|
|
|
|
<< " blockSize:" << std::to_string(blockSize) << " filename:" << fileName
|
|
|
|
<< " fileSize:" << std::to_string(fileSize)
|
|
|
|
<< " tagId: " << (tagId >= 0 ? std::to_string(tagId) : std::string("(None)"));
|
2019-09-04 06:50:21 +08:00
|
|
|
return ss.str();
|
2018-11-30 02:31:47 +08:00
|
|
|
}
|
2017-11-15 15:33:17 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct RangeFile {
|
|
|
|
Version version;
|
|
|
|
uint32_t blockSize;
|
|
|
|
std::string fileName;
|
|
|
|
int64_t fileSize;
|
|
|
|
|
|
|
|
// Order by version, break ties with name
|
|
|
|
bool operator< (const RangeFile &rhs) const {
|
|
|
|
return version == rhs.version ? fileName < rhs.fileName : version < rhs.version;
|
|
|
|
}
|
2018-11-30 02:31:47 +08:00
|
|
|
|
|
|
|
std::string toString() const {
|
2019-09-04 06:50:21 +08:00
|
|
|
std::stringstream ss;
|
|
|
|
ss << "version:" << std::to_string(version) << " blockSize:" << std::to_string(blockSize) <<
|
|
|
|
" fileName:" << fileName << " fileSize:" << std::to_string(fileSize);
|
|
|
|
return ss.str();
|
2018-11-30 02:31:47 +08:00
|
|
|
}
|
2017-11-15 15:33:17 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct KeyspaceSnapshotFile {
|
|
|
|
Version beginVersion;
|
|
|
|
Version endVersion;
|
|
|
|
std::string fileName;
|
|
|
|
int64_t totalSize;
|
2018-01-17 20:09:43 +08:00
|
|
|
Optional<bool> restorable; // Whether or not the snapshot can be used in a restore, if known
|
2019-03-07 06:14:06 +08:00
|
|
|
bool isSingleVersion() const {
|
|
|
|
return beginVersion == endVersion;
|
|
|
|
}
|
|
|
|
double expiredPct(Optional<Version> expiredEnd) const {
|
|
|
|
double pctExpired = 0;
|
|
|
|
if(expiredEnd.present() && expiredEnd.get() > beginVersion) {
|
|
|
|
if(isSingleVersion()) {
|
|
|
|
pctExpired = 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
pctExpired = double(std::min(endVersion, expiredEnd.get()) - beginVersion) / (endVersion - beginVersion);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return pctExpired * 100;
|
|
|
|
}
|
2017-11-15 15:33:17 +08:00
|
|
|
|
|
|
|
// Order by beginVersion, break ties with endVersion
|
|
|
|
bool operator< (const KeyspaceSnapshotFile &rhs) const {
|
|
|
|
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-12-22 14:42:29 +08:00
|
|
|
struct BackupFileList {
|
2017-11-19 20:28:22 +08:00
|
|
|
std::vector<RangeFile> ranges;
|
|
|
|
std::vector<LogFile> logs;
|
|
|
|
std::vector<KeyspaceSnapshotFile> snapshots;
|
2018-12-22 14:42:29 +08:00
|
|
|
|
|
|
|
void toStream(FILE *fout) const;
|
2017-11-19 20:28:22 +08:00
|
|
|
};
|
|
|
|
|
2018-01-03 15:22:35 +08:00
|
|
|
// The byte counts here only include usable log files and byte counts from kvrange manifests
|
2017-11-15 15:33:17 +08:00
|
|
|
struct BackupDescription {
|
2018-01-17 20:09:43 +08:00
|
|
|
BackupDescription() : snapshotBytes(0) {}
|
2017-11-15 15:33:17 +08:00
|
|
|
std::string url;
|
|
|
|
std::vector<KeyspaceSnapshotFile> snapshots;
|
2018-01-03 15:22:35 +08:00
|
|
|
int64_t snapshotBytes;
|
2018-12-16 16:18:13 +08:00
|
|
|
// The version before which everything has been deleted by an expire
|
|
|
|
Optional<Version> expiredEndVersion;
|
|
|
|
// The latest version before which at least some data has been deleted by an expire
|
|
|
|
Optional<Version> unreliableEndVersion;
|
|
|
|
// The minimum log version in the backup
|
2017-11-15 15:33:17 +08:00
|
|
|
Optional<Version> minLogBegin;
|
2018-12-16 16:18:13 +08:00
|
|
|
// The maximum log version in the backup
|
2017-11-15 15:33:17 +08:00
|
|
|
Optional<Version> maxLogEnd;
|
2018-12-16 16:18:13 +08:00
|
|
|
// The maximum log version for which there is contiguous log version coverage extending back to minLogBegin
|
2017-11-15 15:33:17 +08:00
|
|
|
Optional<Version> contiguousLogEnd;
|
2018-12-16 16:18:13 +08:00
|
|
|
// The maximum version which this backup can be used to restore to
|
2017-11-15 15:33:17 +08:00
|
|
|
Optional<Version> maxRestorableVersion;
|
2018-12-16 16:18:13 +08:00
|
|
|
// The minimum version which this backup can be used to restore to
|
2017-11-15 15:33:17 +08:00
|
|
|
Optional<Version> minRestorableVersion;
|
|
|
|
std::string extendedDetail; // Freeform container-specific info.
|
2020-04-09 06:50:21 +08:00
|
|
|
bool partitioned; // If this backup contains partitioned mutation logs.
|
2018-01-17 20:09:43 +08:00
|
|
|
|
|
|
|
// Resolves the versions above to timestamps using a given database's TimeKeeper data.
|
|
|
|
// toString will use this information if present.
|
|
|
|
Future<Void> resolveVersionTimes(Database cx);
|
|
|
|
std::map<Version, int64_t> versionTimeMap;
|
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
std::string toString() const;
|
2019-03-07 06:14:06 +08:00
|
|
|
std::string toJSON() const;
|
2017-11-15 15:33:17 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct RestorableFileSet {
|
|
|
|
Version targetVersion;
|
|
|
|
std::vector<LogFile> logs;
|
|
|
|
std::vector<RangeFile> ranges;
|
2020-04-17 06:11:09 +08:00
|
|
|
|
|
|
|
// Range file's key ranges. Can be empty for backups generated before 6.3.
|
|
|
|
std::map<std::string, KeyRange> keyRanges;
|
|
|
|
|
2020-04-17 06:52:20 +08:00
|
|
|
// Mutation logs continuous range [begin, end). Both can be invalidVersion
|
|
|
|
// when the entire key space snapshot is at the target version.
|
2020-04-17 06:11:09 +08:00
|
|
|
Version continuousBeginVersion, continuousEndVersion;
|
|
|
|
|
2019-11-05 03:47:29 +08:00
|
|
|
KeyspaceSnapshotFile snapshot; // Info. for debug purposes
|
2017-11-15 15:33:17 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* IBackupContainer is an interface to a set of backup data, which contains
|
|
|
|
* - backup metadata
|
|
|
|
* - log files
|
|
|
|
* - range files
|
|
|
|
* - keyspace snapshot files defining a complete non overlapping key space snapshot
|
|
|
|
*
|
|
|
|
* Files in a container are identified by a name. This can be any string, whatever
|
|
|
|
* makes sense for the underlying storage system.
|
|
|
|
*
|
|
|
|
* Reading files is done by file name. File names are discovered by getting a RestorableFileSet.
|
|
|
|
*
|
|
|
|
* For remote data stores that are filesystem-like, it's probably best to inherit BackupContainerFileSystem.
|
|
|
|
*/
|
|
|
|
class IBackupContainer {
|
|
|
|
public:
|
|
|
|
virtual void addref() = 0;
|
|
|
|
virtual void delref() = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
IBackupContainer() {}
|
|
|
|
virtual ~IBackupContainer() {}
|
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
// Create the container
|
2017-05-26 04:48:44 +08:00
|
|
|
virtual Future<Void> create() = 0;
|
2018-12-21 10:05:23 +08:00
|
|
|
virtual Future<bool> exists() = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
// Open a log file or range file for writing
|
|
|
|
virtual Future<Reference<IBackupFile>> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0;
|
2018-11-23 21:23:56 +08:00
|
|
|
virtual Future<Reference<IBackupFile>> writeRangeFile(Version snapshotBeginVersion, int snapshotFileCount, Version fileVersion, int blockSize) = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-10-08 06:36:28 +08:00
|
|
|
// Open a tagged log file for writing, where tagId is the log router tag's id.
|
|
|
|
virtual Future<Reference<IBackupFile>> writeTaggedLogFile(Version beginVersion, Version endVersion, int blockSize,
|
2020-02-21 08:28:27 +08:00
|
|
|
uint16_t tagId, int totalTags) = 0;
|
2019-10-08 06:36:28 +08:00
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
// Write a KeyspaceSnapshotFile of range file names representing a full non overlapping
|
|
|
|
// snapshot of the key ranges this backup is targeting.
|
2020-04-16 14:08:19 +08:00
|
|
|
virtual Future<Void> writeKeyspaceSnapshotFile(const std::vector<std::string>& fileNames,
|
|
|
|
const std::vector<std::pair<Key, Key>>& beginEndKeys,
|
|
|
|
int64_t totalBytes) = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
// Open a file for read by name
|
2020-10-22 13:19:15 +08:00
|
|
|
virtual Future<Reference<IAsyncFile>> readFile(const std::string& name) = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-04-22 02:38:45 +08:00
|
|
|
// Returns the key ranges in the snapshot file. This is an expensive function
|
|
|
|
// and should only be used in simulation for sanity check.
|
|
|
|
virtual Future<KeyRange> getSnapshotFileKeyRange(const RangeFile& file) = 0;
|
|
|
|
|
2018-12-20 16:23:26 +08:00
|
|
|
struct ExpireProgress {
|
|
|
|
std::string step;
|
|
|
|
int total;
|
|
|
|
int done;
|
|
|
|
std::string toString() const;
|
|
|
|
};
|
2018-01-17 20:09:43 +08:00
|
|
|
// Delete backup files which do not contain any data at or after (more recent than) expireEndVersion.
|
|
|
|
// If force is false, then nothing will be deleted unless there is a restorable snapshot which
|
|
|
|
// - begins at or after expireEndVersion
|
|
|
|
// - ends at or before restorableBeginVersion
|
|
|
|
// If force is true, data is deleted unconditionally which could leave the backup in an unusable state. This is not recommended.
|
|
|
|
// Returns true if expiration was done.
|
2018-12-20 16:23:26 +08:00
|
|
|
virtual Future<Void> expireData(Version expireEndVersion, bool force = false, ExpireProgress *progress = nullptr, Version restorableBeginVersion = std::numeric_limits<Version>::max()) = 0;
|
2017-11-15 15:33:17 +08:00
|
|
|
|
|
|
|
// Delete entire container. During the process, if pNumDeleted is not null it will be
|
|
|
|
// updated with the count of deleted files so that progress can be seen.
|
|
|
|
virtual Future<Void> deleteContainer(int *pNumDeleted = nullptr) = 0;
|
|
|
|
|
2018-12-18 20:33:37 +08:00
|
|
|
// Return key details about a backup's contents.
|
|
|
|
// Unless deepScan is true, use cached metadata, if present, as initial contiguous available log range.
|
|
|
|
// If logStartVersionOverride is given, log data prior to that version will be ignored for the purposes
|
|
|
|
// of this describe operation. This can be used to calculate what the restorability of a backup would
|
|
|
|
// be after deleting all data prior to logStartVersionOverride.
|
|
|
|
virtual Future<BackupDescription> describeBackup(bool deepScan = false, Version logStartVersionOverride = invalidVersion) = 0;
|
2017-11-15 15:33:17 +08:00
|
|
|
|
2018-12-22 14:42:29 +08:00
|
|
|
virtual Future<BackupFileList> dumpFileList(Version begin = 0, Version end = std::numeric_limits<Version>::max()) = 0;
|
2017-11-19 20:28:22 +08:00
|
|
|
|
2020-08-30 10:53:04 +08:00
|
|
|
// Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion.
|
2020-09-25 08:27:05 +08:00
|
|
|
// If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot.
|
|
|
|
// If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set.
|
|
|
|
// Returns non-present if restoring to the given version is not possible.
|
2020-08-30 10:53:04 +08:00
|
|
|
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion,
|
2020-09-25 08:27:05 +08:00
|
|
|
VectorRef<KeyRangeRef> keyRangesFilter = {},
|
|
|
|
bool logsOnly = false, Version beginVersion = -1) = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// Get an IBackupContainer based on a container spec string
|
2020-10-22 13:19:15 +08:00
|
|
|
static Reference<IBackupContainer> openContainer(const std::string& url);
|
2017-11-15 15:33:17 +08:00
|
|
|
static std::vector<std::string> getURLFormats();
|
2020-10-22 13:19:15 +08:00
|
|
|
static Future<std::vector<std::string>> listContainers(const std::string& baseURL);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-15 15:33:17 +08:00
|
|
|
std::string getURL() const {
|
|
|
|
return URL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string lastOpenError;
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::string URL;
|
2017-05-26 04:48:44 +08:00
|
|
|
};
|
|
|
|
|
2020-10-11 11:10:12 +08:00
|
|
|
#endif
|