357 lines
15 KiB
C++
357 lines
15 KiB
C++
/*
|
|
* BackupContainer.h
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef FDBCLIENT_BACKUP_CONTAINER_H
|
|
#define FDBCLIENT_BACKUP_CONTAINER_H
|
|
#pragma once
|
|
|
|
#include "flow/flow.h"
|
|
#include "fdbrpc/IAsyncFile.h"
|
|
#include "fdbclient/FDBTypes.h"
|
|
#include "fdbclient/NativeAPI.actor.h"
|
|
#include "fdbclient/ReadYourWrites.h"
|
|
#include <vector>
|
|
|
|
class ReadYourWritesTransaction;
|
|
|
|
Future<Optional<int64_t>> timeKeeperEpochsFromVersion(Version const& v, Reference<ReadYourWritesTransaction> const& tr);
|
|
Future<Version> timeKeeperVersionFromDatetime(std::string const& datetime, Database const& db);
|
|
|
|
// Append-only file interface for writing backup data
|
|
// Once finish() is called the file cannot be further written to.
|
|
// Backup containers should not attempt to use files for which finish was not called or did not complete.
|
|
// TODO: Move the log file and range file format encoding/decoding stuff to this file and behind interfaces.
|
|
class IBackupFile {
|
|
public:
|
|
IBackupFile(const std::string& fileName) : m_fileName(fileName) {}
|
|
virtual ~IBackupFile() {}
|
|
// Backup files are append-only and cannot have more than 1 append outstanding at once.
|
|
virtual Future<Void> append(const void* data, int len) = 0;
|
|
virtual Future<Void> finish() = 0;
|
|
inline std::string getFileName() const { return m_fileName; }
|
|
virtual int64_t size() const = 0;
|
|
virtual void addref() = 0;
|
|
virtual void delref() = 0;
|
|
|
|
Future<Void> appendStringRefWithLen(Standalone<StringRef> s);
|
|
|
|
protected:
|
|
std::string m_fileName;
|
|
};
|
|
|
|
// Structures for various backup components
|
|
|
|
// Mutation log version written by old FileBackupAgent
|
|
static const uint32_t BACKUP_AGENT_MLOG_VERSION = 2001;
|
|
|
|
// Mutation log version written by BackupWorker
|
|
static const uint32_t PARTITIONED_MLOG_VERSION = 4110;
|
|
|
|
// Snapshot file version written by FileBackupAgent
|
|
static const uint32_t BACKUP_AGENT_SNAPSHOT_FILE_VERSION = 1001;
|
|
|
|
struct LogFile {
|
|
Version beginVersion;
|
|
Version endVersion;
|
|
uint32_t blockSize;
|
|
std::string fileName;
|
|
int64_t fileSize;
|
|
int tagId = -1; // Log router tag. Non-negative for new backup format.
|
|
int totalTags = -1; // Total number of log router tags.
|
|
|
|
// Order by beginVersion, break ties with endVersion
|
|
bool operator<(const LogFile& rhs) const {
|
|
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
|
|
}
|
|
|
|
// Returns if this log file contains a subset of content of the given file
|
|
// by comparing version range and tag ID.
|
|
bool isSubset(const LogFile& rhs) const {
|
|
return beginVersion >= rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
|
}
|
|
|
|
bool isPartitionedLog() const { return tagId >= 0 && tagId < totalTags; }
|
|
|
|
std::string toString() const {
|
|
std::stringstream ss;
|
|
ss << "beginVersion:" << std::to_string(beginVersion) << " endVersion:" << std::to_string(endVersion)
|
|
<< " blockSize:" << std::to_string(blockSize) << " filename:" << fileName
|
|
<< " fileSize:" << std::to_string(fileSize)
|
|
<< " tagId: " << (tagId >= 0 ? std::to_string(tagId) : std::string("(None)"));
|
|
return ss.str();
|
|
}
|
|
};
|
|
|
|
struct RangeFile {
|
|
Version version;
|
|
uint32_t blockSize;
|
|
std::string fileName;
|
|
int64_t fileSize;
|
|
|
|
// Order by version, break ties with name
|
|
bool operator<(const RangeFile& rhs) const {
|
|
return version == rhs.version ? fileName < rhs.fileName : version < rhs.version;
|
|
}
|
|
|
|
std::string toString() const {
|
|
std::stringstream ss;
|
|
ss << "version:" << std::to_string(version) << " blockSize:" << std::to_string(blockSize)
|
|
<< " fileName:" << fileName << " fileSize:" << std::to_string(fileSize);
|
|
return ss.str();
|
|
}
|
|
};
|
|
|
|
struct KeyspaceSnapshotFile {
|
|
Version beginVersion;
|
|
Version endVersion;
|
|
std::string fileName;
|
|
int64_t totalSize;
|
|
Optional<bool> restorable; // Whether or not the snapshot can be used in a restore, if known
|
|
bool isSingleVersion() const { return beginVersion == endVersion; }
|
|
double expiredPct(Optional<Version> expiredEnd) const {
|
|
double pctExpired = 0;
|
|
if (expiredEnd.present() && expiredEnd.get() > beginVersion) {
|
|
if (isSingleVersion()) {
|
|
pctExpired = 1;
|
|
} else {
|
|
pctExpired =
|
|
double(std::min(endVersion, expiredEnd.get()) - beginVersion) / (endVersion - beginVersion);
|
|
}
|
|
}
|
|
return pctExpired * 100;
|
|
}
|
|
|
|
// Order by beginVersion, break ties with endVersion
|
|
bool operator<(const KeyspaceSnapshotFile& rhs) const {
|
|
return beginVersion == rhs.beginVersion ? endVersion < rhs.endVersion : beginVersion < rhs.beginVersion;
|
|
}
|
|
};
|
|
|
|
struct BackupFileList {
|
|
std::vector<RangeFile> ranges;
|
|
std::vector<LogFile> logs;
|
|
std::vector<KeyspaceSnapshotFile> snapshots;
|
|
|
|
void toStream(FILE* fout) const;
|
|
};
|
|
|
|
// The byte counts here only include usable log files and byte counts from kvrange manifests
|
|
struct BackupDescription {
|
|
BackupDescription() : snapshotBytes(0) {}
|
|
std::string url;
|
|
Optional<std::string> proxy;
|
|
std::vector<KeyspaceSnapshotFile> snapshots;
|
|
int64_t snapshotBytes;
|
|
// The version before which everything has been deleted by an expire
|
|
Optional<Version> expiredEndVersion;
|
|
// The latest version before which at least some data has been deleted by an expire
|
|
Optional<Version> unreliableEndVersion;
|
|
// The minimum log version in the backup
|
|
Optional<Version> minLogBegin;
|
|
// The maximum log version in the backup
|
|
Optional<Version> maxLogEnd;
|
|
// The maximum log version for which there is contiguous log version coverage extending back to minLogBegin
|
|
Optional<Version> contiguousLogEnd;
|
|
// The maximum version which this backup can be used to restore to
|
|
Optional<Version> maxRestorableVersion;
|
|
// The minimum version which this backup can be used to restore to
|
|
Optional<Version> minRestorableVersion;
|
|
std::string extendedDetail; // Freeform container-specific info.
|
|
bool partitioned; // If this backup contains partitioned mutation logs.
|
|
|
|
// Resolves the versions above to timestamps using a given database's TimeKeeper data.
|
|
// toString will use this information if present.
|
|
Future<Void> resolveVersionTimes(Database cx);
|
|
std::map<Version, int64_t> versionTimeMap;
|
|
|
|
std::string toString() const;
|
|
std::string toJSON() const;
|
|
};
|
|
|
|
struct RestorableFileSet {
|
|
Version targetVersion;
|
|
std::vector<LogFile> logs;
|
|
std::vector<RangeFile> ranges;
|
|
|
|
// Range file's key ranges. Can be empty for backups generated before 6.3.
|
|
std::map<std::string, KeyRange> keyRanges;
|
|
|
|
// Mutation logs continuous range [begin, end). Both can be invalidVersion
|
|
// when the entire key space snapshot is at the target version.
|
|
Version continuousBeginVersion, continuousEndVersion;
|
|
|
|
KeyspaceSnapshotFile snapshot; // Info. for debug purposes
|
|
};
|
|
|
|
/* IBackupContainer is an interface to a set of backup data, which contains
|
|
* - backup metadata
|
|
* - log files
|
|
* - range files
|
|
* - keyspace snapshot files defining a complete non overlapping key space snapshot
|
|
*
|
|
* Files in a container are identified by a name. This can be any string, whatever
|
|
* makes sense for the underlying storage system.
|
|
*
|
|
* Reading files is done by file name. File names are discovered by getting a RestorableFileSet.
|
|
*
|
|
* For remote data stores that are filesystem-like, it's probably best to inherit BackupContainerFileSystem.
|
|
*/
|
|
class IBackupContainer {
|
|
public:
|
|
virtual void addref() = 0;
|
|
virtual void delref() = 0;
|
|
|
|
IBackupContainer() {}
|
|
virtual ~IBackupContainer() {}
|
|
|
|
// Create the container
|
|
virtual Future<Void> create() = 0;
|
|
virtual Future<bool> exists() = 0;
|
|
|
|
// Open a log file or range file for writing
|
|
virtual Future<Reference<IBackupFile>> writeLogFile(Version beginVersion, Version endVersion, int blockSize) = 0;
|
|
virtual Future<Reference<IBackupFile>> writeRangeFile(Version snapshotBeginVersion,
|
|
int snapshotFileCount,
|
|
Version fileVersion,
|
|
int blockSize) = 0;
|
|
|
|
// Open a tagged log file for writing, where tagId is the log router tag's id.
|
|
virtual Future<Reference<IBackupFile>> writeTaggedLogFile(Version beginVersion,
|
|
Version endVersion,
|
|
int blockSize,
|
|
uint16_t tagId,
|
|
int totalTags) = 0;
|
|
|
|
// Write a KeyspaceSnapshotFile of range file names representing a full non overlapping
|
|
// snapshot of the key ranges this backup is targeting.
|
|
virtual Future<Void> writeKeyspaceSnapshotFile(const std::vector<std::string>& fileNames,
|
|
const std::vector<std::pair<Key, Key>>& beginEndKeys,
|
|
int64_t totalBytes) = 0;
|
|
|
|
// Open a file for read by name
|
|
virtual Future<Reference<IAsyncFile>> readFile(const std::string& name) = 0;
|
|
|
|
// Returns the key ranges in the snapshot file. This is an expensive function
|
|
// and should only be used in simulation for sanity check.
|
|
virtual Future<KeyRange> getSnapshotFileKeyRange(const RangeFile& file) = 0;
|
|
|
|
struct ExpireProgress {
|
|
std::string step;
|
|
int total;
|
|
int done;
|
|
std::string toString() const;
|
|
};
|
|
// Delete backup files which do not contain any data at or after (more recent than) expireEndVersion.
|
|
// If force is false, then nothing will be deleted unless there is a restorable snapshot which
|
|
// - begins at or after expireEndVersion
|
|
// - ends at or before restorableBeginVersion
|
|
// If force is true, data is deleted unconditionally which could leave the backup in an unusable state. This is not
|
|
// recommended. Returns true if expiration was done.
|
|
virtual Future<Void> expireData(Version expireEndVersion,
|
|
bool force = false,
|
|
ExpireProgress* progress = nullptr,
|
|
Version restorableBeginVersion = std::numeric_limits<Version>::max()) = 0;
|
|
|
|
// Delete entire container. During the process, if pNumDeleted is not null it will be
|
|
// updated with the count of deleted files so that progress can be seen.
|
|
virtual Future<Void> deleteContainer(int* pNumDeleted = nullptr) = 0;
|
|
|
|
// Return key details about a backup's contents.
|
|
// Unless deepScan is true, use cached metadata, if present, as initial contiguous available log range.
|
|
// If logStartVersionOverride is given, log data prior to that version will be ignored for the purposes
|
|
// of this describe operation. This can be used to calculate what the restorability of a backup would
|
|
// be after deleting all data prior to logStartVersionOverride.
|
|
virtual Future<BackupDescription> describeBackup(bool deepScan = false,
|
|
Version logStartVersionOverride = invalidVersion) = 0;
|
|
|
|
virtual Future<BackupFileList> dumpFileList(Version begin = 0,
|
|
Version end = std::numeric_limits<Version>::max()) = 0;
|
|
|
|
// Get exactly the files necessary to restore the key space filtered by the specified key ranges to targetVersion.
|
|
// If targetVersion is 'latestVersion', use the minimum restorable version in a snapshot.
|
|
// If logsOnly is set, only use log files in [beginVersion, targetVervions) in restore set.
|
|
// Returns non-present if restoring to the given version is not possible.
|
|
virtual Future<Optional<RestorableFileSet>> getRestoreSet(Version targetVersion,
|
|
VectorRef<KeyRangeRef> keyRangesFilter = {},
|
|
bool logsOnly = false,
|
|
Version beginVersion = -1) = 0;
|
|
|
|
// Get an IBackupContainer based on a container spec string
|
|
static Reference<IBackupContainer> openContainer(const std::string& url,
|
|
const Optional<std::string>& proxy,
|
|
const Optional<std::string>& encryptionKeyFileName);
|
|
static std::vector<std::string> getURLFormats();
|
|
static Future<std::vector<std::string>> listContainers(const std::string& baseURL,
|
|
const Optional<std::string>& proxy);
|
|
|
|
std::string const& getURL() const { return URL; }
|
|
Optional<std::string> const& getProxy() const { return proxy; }
|
|
Optional<std::string> const& getEncryptionKeyFileName() const { return encryptionKeyFileName; }
|
|
|
|
static std::string lastOpenError;
|
|
|
|
// TODO: change the following back to `private` once blob obj access is refactored
|
|
protected:
|
|
std::string URL;
|
|
Optional<std::string> proxy;
|
|
Optional<std::string> encryptionKeyFileName;
|
|
};
|
|
|
|
namespace fileBackup {
|
|
// Accumulates mutation log value chunks, as both a vector of chunks and as a combined chunk,
|
|
// in chunk order, and can check the chunk set for completion or intersection with a set
|
|
// of ranges.
|
|
struct AccumulatedMutations {
|
|
AccumulatedMutations() : lastChunkNumber(-1) {}
|
|
|
|
// Add a KV pair for this mutation chunk set
|
|
// It will be accumulated onto serializedMutations if the chunk number is
|
|
// the next expected value.
|
|
void addChunk(int chunkNumber, const KeyValueRef& kv);
|
|
|
|
// Returns true if both
|
|
// - 1 or more chunks were added to this set
|
|
// - The header of the first chunk contains a valid protocol version and a length
|
|
// that matches the bytes after the header in the combined value in serializedMutations
|
|
bool isComplete() const;
|
|
|
|
// Returns true if a complete chunk contains any MutationRefs which intersect with any
|
|
// range in ranges.
|
|
// It is undefined behavior to run this if isComplete() does not return true.
|
|
bool matchesAnyRange(const std::vector<KeyRange>& ranges) const;
|
|
|
|
std::vector<KeyValueRef> kvs;
|
|
std::string serializedMutations;
|
|
int lastChunkNumber;
|
|
};
|
|
|
|
// Decodes a mutation log key, which contains (hash, commitVersion, chunkNumber) and
|
|
// returns (commitVersion, chunkNumber)
|
|
std::pair<Version, int32_t> decodeMutationLogKey(const StringRef& key);
|
|
|
|
// Decodes an encoded list of mutations in the format of:
|
|
// [includeVersion:uint64_t][val_length:uint32_t][mutation_1][mutation_2]...[mutation_k],
|
|
// where a mutation is encoded as:
|
|
// [type:uint32_t][keyLength:uint32_t][valueLength:uint32_t][param1][param2]
|
|
std::vector<MutationRef> decodeMutationLogValue(const StringRef& value);
|
|
} // namespace fileBackup
|
|
|
|
#endif
|