2020-10-21 09:55:40 +08:00
|
|
|
/*
|
|
|
|
* BackupContainerFileSystem.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
2022-03-22 04:36:23 +08:00
|
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
2020-10-21 09:55:40 +08:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-10-24 14:27:13 +08:00
|
|
|
#include "fdbclient/BackupAgent.actor.h"
|
2023-02-28 13:40:46 +08:00
|
|
|
#include "fdbclient/BackupContainer.h"
|
|
|
|
#include "flow/BooleanParam.h"
|
2022-06-24 08:05:36 +08:00
|
|
|
#ifdef BUILD_AZURE_BACKUP
|
2020-10-21 15:48:40 +08:00
|
|
|
#include "fdbclient/BackupContainerAzureBlobStore.h"
|
2022-06-24 08:05:36 +08:00
|
|
|
#endif
|
2020-10-22 01:43:08 +08:00
|
|
|
#include "fdbclient/BackupContainerFileSystem.h"
|
2020-10-21 15:06:59 +08:00
|
|
|
#include "fdbclient/BackupContainerLocalDirectory.h"
|
2021-08-25 02:47:47 +08:00
|
|
|
#include "fdbclient/BackupContainerS3BlobStore.h"
|
2020-10-24 14:27:13 +08:00
|
|
|
#include "fdbclient/JsonBuilder.h"
|
2021-06-26 15:07:27 +08:00
|
|
|
#include "flow/StreamCipher.h"
|
2020-10-24 14:27:13 +08:00
|
|
|
#include "flow/UnitTest.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cinttypes>
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
class BackupContainerFileSystemImpl {
|
|
|
|
public:
|
|
|
|
// TODO: Do this more efficiently, as the range file list for a snapshot could potentially be hundreds of
|
|
|
|
// megabytes.
|
|
|
|
ACTOR static Future<std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>>> readKeyspaceSnapshot(
|
|
|
|
Reference<BackupContainerFileSystem> bc,
|
|
|
|
KeyspaceSnapshotFile snapshot) {
|
|
|
|
// Read the range file list for the specified version range, and then index them by fileName.
|
|
|
|
// This is so we can verify that each of the files listed in the manifest file are also in the container at this
|
|
|
|
// time.
|
|
|
|
std::vector<RangeFile> files = wait(bc->listRangeFiles(snapshot.beginVersion, snapshot.endVersion));
|
|
|
|
state std::map<std::string, RangeFile> rangeIndex;
|
|
|
|
for (auto& f : files)
|
|
|
|
rangeIndex[f.fileName] = std::move(f);
|
|
|
|
|
|
|
|
// Read the snapshot file, verify the version range, then find each of the range files by name in the index and
|
|
|
|
// return them.
|
|
|
|
state Reference<IAsyncFile> f = wait(bc->readFile(snapshot.fileName));
|
|
|
|
int64_t size = wait(f->size());
|
|
|
|
state Standalone<StringRef> buf = makeString(size);
|
|
|
|
wait(success(f->read(mutateString(buf), buf.size(), 0)));
|
|
|
|
json_spirit::mValue json;
|
|
|
|
json_spirit::read_string(buf.toString(), json);
|
|
|
|
JSONDoc doc(json);
|
|
|
|
|
|
|
|
Version v;
|
|
|
|
if (!doc.tryGet("beginVersion", v) || v != snapshot.beginVersion)
|
|
|
|
throw restore_corrupted_data();
|
|
|
|
if (!doc.tryGet("endVersion", v) || v != snapshot.endVersion)
|
|
|
|
throw restore_corrupted_data();
|
|
|
|
|
|
|
|
json_spirit::mValue& filesArray = doc.create("files");
|
|
|
|
if (filesArray.type() != json_spirit::array_type)
|
|
|
|
throw restore_corrupted_data();
|
|
|
|
|
|
|
|
std::vector<RangeFile> results;
|
|
|
|
int missing = 0;
|
|
|
|
|
|
|
|
for (auto const& fileValue : filesArray.get_array()) {
|
|
|
|
if (fileValue.type() != json_spirit::str_type)
|
|
|
|
throw restore_corrupted_data();
|
|
|
|
|
|
|
|
// If the file is not in the index then log the error but don't throw yet, keep checking the whole list.
|
|
|
|
auto i = rangeIndex.find(fileValue.get_str());
|
|
|
|
if (i == rangeIndex.end()) {
|
|
|
|
TraceEvent(SevError, "FileRestoreMissingRangeFile")
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("File", fileValue.get_str());
|
|
|
|
|
|
|
|
++missing;
|
|
|
|
}
|
|
|
|
|
|
|
|
// No point in using more memory once data is missing since an error will be thrown instead.
|
|
|
|
if (missing == 0) {
|
|
|
|
results.push_back(i->second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (missing > 0) {
|
|
|
|
TraceEvent(SevError, "FileRestoreMissingRangeFileSummary")
|
2020-10-21 09:55:40 +08:00
|
|
|
.detail("URL", bc->getURL())
|
2020-10-22 14:56:37 +08:00
|
|
|
.detail("Count", missing);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
throw restore_missing_data();
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Check key ranges for files
|
|
|
|
std::map<std::string, KeyRange> fileKeyRanges;
|
|
|
|
JSONDoc ranges = doc.subDoc("keyRanges"); // Create an empty doc if not existed
|
|
|
|
for (auto i : ranges.obj()) {
|
|
|
|
const std::string& filename = i.first;
|
|
|
|
JSONDoc fields(i.second);
|
|
|
|
std::string begin, end;
|
|
|
|
if (fields.tryGet("beginKey", begin) && fields.tryGet("endKey", end)) {
|
|
|
|
TraceEvent("ManifestFields")
|
|
|
|
.detail("File", filename)
|
|
|
|
.detail("Begin", printable(StringRef(begin)))
|
|
|
|
.detail("End", printable(StringRef(end)));
|
|
|
|
fileKeyRanges.emplace(filename, KeyRange(KeyRangeRef(StringRef(begin), StringRef(end))));
|
|
|
|
} else {
|
|
|
|
TraceEvent("MalFormattedManifest").detail("Key", filename);
|
|
|
|
throw restore_corrupted_data();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
|
|
|
|
return std::make_pair(results, fileKeyRanges);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Backup log types
|
|
|
|
static constexpr Version NON_PARTITIONED_MUTATION_LOG = 0;
|
|
|
|
static constexpr Version PARTITIONED_MUTATION_LOG = 1;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Find what should be the filename of a path by finding whatever is after the last forward or backward slash, or
|
|
|
|
// failing to find those, the whole string.
|
|
|
|
static std::string fileNameOnly(const std::string& path) {
|
|
|
|
// Find the last forward slash position, defaulting to 0 if not found
|
|
|
|
int pos = path.find_last_of('/');
|
|
|
|
if (pos == std::string::npos) {
|
|
|
|
pos = 0;
|
|
|
|
}
|
|
|
|
// Find the last backward slash position after pos, and update pos if found
|
|
|
|
int b = path.find_last_of('\\', pos);
|
|
|
|
if (b != std::string::npos) {
|
|
|
|
pos = b;
|
|
|
|
}
|
|
|
|
return path.substr(pos + 1);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
static bool pathToRangeFile(RangeFile& out, const std::string& path, int64_t size) {
|
|
|
|
std::string name = fileNameOnly(path);
|
|
|
|
RangeFile f;
|
|
|
|
f.fileName = path;
|
|
|
|
f.fileSize = size;
|
|
|
|
int len;
|
|
|
|
if (sscanf(name.c_str(), "range,%" SCNd64 ",%*[^,],%u%n", &f.version, &f.blockSize, &len) == 2 &&
|
|
|
|
len == name.size()) {
|
|
|
|
out = f;
|
|
|
|
return true;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
return false;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
ACTOR static Future<Void> writeKeyspaceSnapshotFile(Reference<BackupContainerFileSystem> bc,
|
|
|
|
std::vector<std::string> fileNames,
|
|
|
|
std::vector<std::pair<Key, Key>> beginEndKeys,
|
2023-02-28 13:40:46 +08:00
|
|
|
int64_t totalBytes,
|
|
|
|
IncludeKeyRangeMap includeKeyRangeMap) {
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(!fileNames.empty() && fileNames.size() == beginEndKeys.size());
|
|
|
|
|
|
|
|
state Version minVer = std::numeric_limits<Version>::max();
|
|
|
|
state Version maxVer = 0;
|
|
|
|
state RangeFile rf;
|
|
|
|
state json_spirit::mArray fileArray;
|
|
|
|
|
|
|
|
// Validate each filename, update version range
|
2020-12-27 13:46:20 +08:00
|
|
|
for (const auto& f : fileNames) {
|
2020-10-22 14:56:37 +08:00
|
|
|
if (pathToRangeFile(rf, f, 0)) {
|
|
|
|
fileArray.push_back(f);
|
|
|
|
if (rf.version < minVer)
|
|
|
|
minVer = rf.version;
|
|
|
|
if (rf.version > maxVer)
|
|
|
|
maxVer = rf.version;
|
|
|
|
} else
|
|
|
|
throw restore_unknown_file_type();
|
|
|
|
wait(yield());
|
|
|
|
}
|
|
|
|
|
|
|
|
state json_spirit::mValue json;
|
|
|
|
state JSONDoc doc(json);
|
|
|
|
|
|
|
|
doc.create("files") = std::move(fileArray);
|
|
|
|
doc.create("totalBytes") = totalBytes;
|
|
|
|
doc.create("beginVersion") = minVer;
|
|
|
|
doc.create("endVersion") = maxVer;
|
|
|
|
|
2023-02-28 13:40:46 +08:00
|
|
|
if (includeKeyRangeMap) {
|
|
|
|
auto ranges = doc.subDoc("keyRanges");
|
|
|
|
for (int i = 0; i < beginEndKeys.size(); i++) {
|
|
|
|
auto fileDoc = ranges.subDoc(fileNames[i], /*split=*/false);
|
|
|
|
fileDoc.create("beginKey") = beginEndKeys[i].first.toString();
|
|
|
|
fileDoc.create("endKey") = beginEndKeys[i].second.toString();
|
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
wait(yield());
|
2020-10-22 14:56:37 +08:00
|
|
|
state std::string docString = json_spirit::write_string(json);
|
|
|
|
|
|
|
|
state Reference<IBackupFile> f =
|
|
|
|
wait(bc->writeFile(format("snapshots/snapshot,%lld,%lld,%lld", minVer, maxVer, totalBytes)));
|
|
|
|
wait(f->append(docString.data(), docString.size()));
|
|
|
|
wait(f->finish());
|
|
|
|
|
|
|
|
return Void();
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
ACTOR static Future<BackupFileList> dumpFileList(Reference<BackupContainerFileSystem> bc,
|
|
|
|
Version begin,
|
|
|
|
Version end) {
|
|
|
|
state Future<std::vector<RangeFile>> fRanges = bc->listRangeFiles(begin, end);
|
|
|
|
state Future<std::vector<KeyspaceSnapshotFile>> fSnapshots = bc->listKeyspaceSnapshots(begin, end);
|
|
|
|
state std::vector<LogFile> logs;
|
|
|
|
state std::vector<LogFile> pLogs;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
wait(success(fRanges) && success(fSnapshots) && store(logs, bc->listLogFiles(begin, end, false)) &&
|
|
|
|
store(pLogs, bc->listLogFiles(begin, end, true)));
|
|
|
|
logs.insert(logs.end(), std::make_move_iterator(pLogs.begin()), std::make_move_iterator(pLogs.end()));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupFileList({ fRanges.get(), std::move(logs), fSnapshots.get() });
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
static Version resolveRelativeVersion(Optional<Version> max, Version v, const char* name, Error e) {
|
|
|
|
if (v == invalidVersion) {
|
|
|
|
TraceEvent(SevError, "BackupExpireInvalidVersion").detail(name, v);
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
if (v < 0) {
|
|
|
|
if (!max.present()) {
|
|
|
|
TraceEvent(SevError, "BackupExpireCannotResolveRelativeVersion").detail(name, v);
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
v += max.get();
|
|
|
|
}
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
// For a list of log files specified by their indices (of the same tag),
|
|
|
|
// returns if they are continous in the range [begin, end]. If "tags" is not
|
|
|
|
// nullptr, then it will be populated with [begin, end] -> tags, where next
|
|
|
|
// pair's begin <= previous pair's end + 1. On return, the last pair's end
|
|
|
|
// version (inclusive) gives the continuous range from begin.
|
|
|
|
static bool isContinuous(const std::vector<LogFile>& files,
|
|
|
|
const std::vector<int>& indices,
|
|
|
|
Version begin,
|
|
|
|
Version end,
|
|
|
|
std::map<std::pair<Version, Version>, int>* tags) {
|
|
|
|
Version lastBegin = invalidVersion;
|
|
|
|
Version lastEnd = invalidVersion;
|
|
|
|
int lastTags = -1;
|
|
|
|
|
|
|
|
ASSERT(tags == nullptr || tags->empty());
|
|
|
|
for (int idx : indices) {
|
|
|
|
const LogFile& file = files[idx];
|
|
|
|
if (lastEnd == invalidVersion) {
|
|
|
|
if (file.beginVersion > begin)
|
|
|
|
return false;
|
|
|
|
if (file.endVersion > begin) {
|
|
|
|
lastBegin = begin;
|
|
|
|
lastTags = file.totalTags;
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else if (lastEnd < file.beginVersion) {
|
|
|
|
if (tags != nullptr) {
|
|
|
|
tags->emplace(std::make_pair(lastBegin, lastEnd - 1), lastTags);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (lastTags != file.totalTags) {
|
|
|
|
if (tags != nullptr) {
|
|
|
|
tags->emplace(std::make_pair(lastBegin, file.beginVersion - 1), lastTags);
|
|
|
|
}
|
|
|
|
lastBegin = file.beginVersion;
|
|
|
|
lastTags = file.totalTags;
|
|
|
|
}
|
|
|
|
lastEnd = file.endVersion;
|
|
|
|
if (lastEnd > end)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (tags != nullptr && lastBegin != invalidVersion) {
|
|
|
|
tags->emplace(std::make_pair(lastBegin, std::min(end, lastEnd - 1)), lastTags);
|
|
|
|
}
|
|
|
|
return lastBegin != invalidVersion && lastEnd > end;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the end version such that [begin, end] is continuous.
|
|
|
|
// "logs" should be already sorted.
|
|
|
|
static Version getPartitionedLogsContinuousEndVersion(const std::vector<LogFile>& logs, Version begin) {
|
|
|
|
Version end = 0;
|
|
|
|
|
|
|
|
std::map<int, std::vector<int>> tagIndices; // tagId -> indices in files
|
|
|
|
for (int i = 0; i < logs.size(); i++) {
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_GE(logs[i].tagId, 0);
|
|
|
|
ASSERT_LT(logs[i].tagId, logs[i].totalTags);
|
2020-10-22 14:56:37 +08:00
|
|
|
auto& indices = tagIndices[logs[i].tagId];
|
|
|
|
// filter out if indices.back() is subset of files[i] or vice versa
|
|
|
|
if (!indices.empty()) {
|
|
|
|
if (logs[indices.back()].isSubset(logs[i])) {
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_LE(logs[indices.back()].fileSize, logs[i].fileSize);
|
2020-10-22 14:56:37 +08:00
|
|
|
indices.back() = i;
|
|
|
|
} else if (!logs[i].isSubset(logs[indices.back()])) {
|
|
|
|
indices.push_back(i);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
indices.push_back(i);
|
|
|
|
}
|
|
|
|
end = std::max(end, logs[i].endVersion - 1);
|
|
|
|
}
|
|
|
|
TraceEvent("ContinuousLogEnd").detail("Begin", begin).detail("InitVersion", end);
|
|
|
|
|
|
|
|
// check partition 0 is continuous in [begin, end] and create a map of ranges to partitions
|
|
|
|
std::map<std::pair<Version, Version>, int> tags; // range [start, end] -> partitions
|
|
|
|
isContinuous(logs, tagIndices[0], begin, end, &tags);
|
|
|
|
if (tags.empty() || end <= begin)
|
|
|
|
return 0;
|
|
|
|
end = std::min(end, tags.rbegin()->first.second);
|
|
|
|
TraceEvent("ContinuousLogEnd").detail("Partition", 0).detail("EndVersion", end).detail("Begin", begin);
|
|
|
|
|
|
|
|
// for each range in tags, check all partitions from 1 are continouous
|
|
|
|
Version lastEnd = begin;
|
|
|
|
for (const auto& [beginEnd, count] : tags) {
|
|
|
|
Version tagEnd = beginEnd.second; // This range's minimum continous partition version
|
|
|
|
for (int i = 1; i < count; i++) {
|
|
|
|
std::map<std::pair<Version, Version>, int> rangeTags;
|
|
|
|
isContinuous(logs, tagIndices[i], beginEnd.first, beginEnd.second, &rangeTags);
|
|
|
|
tagEnd = rangeTags.empty() ? 0 : std::min(tagEnd, rangeTags.rbegin()->first.second);
|
|
|
|
TraceEvent("ContinuousLogEnd")
|
|
|
|
.detail("Partition", i)
|
|
|
|
.detail("EndVersion", tagEnd)
|
|
|
|
.detail("RangeBegin", beginEnd.first)
|
|
|
|
.detail("RangeEnd", beginEnd.second);
|
|
|
|
if (tagEnd == 0)
|
|
|
|
return lastEnd == begin ? 0 : lastEnd;
|
|
|
|
}
|
|
|
|
if (tagEnd < beginEnd.second) {
|
|
|
|
return tagEnd;
|
|
|
|
}
|
|
|
|
lastEnd = beginEnd.second;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
return end;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Analyze partitioned logs and set contiguousLogEnd for "desc" if larger
|
|
|
|
// than the "scanBegin" version.
|
|
|
|
static void updatePartitionedLogsContinuousEnd(BackupDescription* desc,
|
|
|
|
const std::vector<LogFile>& logs,
|
|
|
|
const Version scanBegin,
|
|
|
|
const Version scanEnd) {
|
|
|
|
if (logs.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
Version snapshotBeginVersion = desc->snapshots.size() > 0 ? desc->snapshots[0].beginVersion : invalidVersion;
|
|
|
|
Version begin = std::max(scanBegin, desc->minLogBegin.get());
|
|
|
|
TraceEvent("ContinuousLogEnd")
|
|
|
|
.detail("ScanBegin", scanBegin)
|
|
|
|
.detail("ScanEnd", scanEnd)
|
|
|
|
.detail("Begin", begin)
|
|
|
|
.detail("ContiguousLogEnd", desc->contiguousLogEnd.get());
|
|
|
|
for (const auto& file : logs) {
|
|
|
|
if (file.beginVersion > begin) {
|
|
|
|
if (scanBegin > 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// scanBegin is 0
|
|
|
|
desc->minLogBegin = file.beginVersion;
|
|
|
|
begin = file.beginVersion;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
Version ver = getPartitionedLogsContinuousEndVersion(logs, begin);
|
|
|
|
if (ver >= desc->contiguousLogEnd.get()) {
|
|
|
|
// contiguousLogEnd is not inclusive, so +1 here.
|
|
|
|
desc->contiguousLogEnd.get() = ver + 1;
|
|
|
|
TraceEvent("UpdateContinuousLogEnd").detail("Version", ver + 1);
|
|
|
|
if (ver > snapshotBeginVersion)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Computes the continuous end version for non-partitioned mutation logs up to
|
|
|
|
// the "targetVersion". If "outLogs" is not nullptr, it will be updated with
|
|
|
|
// continuous log files. "*end" is updated with the continuous end version.
|
|
|
|
static void computeRestoreEndVersion(const std::vector<LogFile>& logs,
|
|
|
|
std::vector<LogFile>* outLogs,
|
|
|
|
Version* end,
|
|
|
|
Version targetVersion) {
|
|
|
|
auto i = logs.begin();
|
2023-06-14 08:03:57 +08:00
|
|
|
if (outLogs != nullptr) {
|
2020-10-22 14:56:37 +08:00
|
|
|
outLogs->push_back(*i);
|
2023-06-14 08:03:57 +08:00
|
|
|
++i; // skip the first file
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Add logs to restorable logs set until continuity is broken OR we reach targetVersion
|
2023-06-14 08:03:57 +08:00
|
|
|
while (i != logs.end()) {
|
2020-10-22 14:56:37 +08:00
|
|
|
if (i->beginVersion > *end || i->beginVersion > targetVersion)
|
|
|
|
break;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If the next link in the log chain is found, update the end
|
|
|
|
if (i->beginVersion == *end) {
|
|
|
|
if (outLogs != nullptr)
|
|
|
|
outLogs->push_back(*i);
|
|
|
|
*end = i->endVersion;
|
|
|
|
}
|
2023-06-14 08:03:57 +08:00
|
|
|
++i;
|
2020-10-22 14:56:37 +08:00
|
|
|
}
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
ACTOR static Future<BackupDescription> describeBackup(Reference<BackupContainerFileSystem> bc,
|
|
|
|
bool deepScan,
|
|
|
|
Version logStartVersionOverride) {
|
|
|
|
state BackupDescription desc;
|
|
|
|
desc.url = bc->getURL();
|
2022-03-29 08:10:49 +08:00
|
|
|
desc.proxy = bc->getProxy();
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
TraceEvent("BackupContainerDescribe1")
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("LogStartVersionOverride", logStartVersionOverride);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
bool e = wait(bc->exists());
|
|
|
|
if (!e) {
|
|
|
|
TraceEvent(SevWarnAlways, "BackupContainerDoesNotExist").detail("URL", bc->getURL());
|
|
|
|
throw backup_does_not_exist();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If logStartVersion is relative, then first do a recursive call without it to find the max log version
|
|
|
|
// from which to resolve the relative version.
|
|
|
|
// This could be handled more efficiently without recursion but it's tricky, this will do for now.
|
|
|
|
if (logStartVersionOverride != invalidVersion && logStartVersionOverride < 0) {
|
|
|
|
BackupDescription tmp = wait(bc->describeBackup(false, invalidVersion));
|
|
|
|
logStartVersionOverride = resolveRelativeVersion(
|
|
|
|
tmp.maxLogEnd, logStartVersionOverride, "LogStartVersionOverride", invalid_option_value());
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Get metadata versions
|
|
|
|
state Optional<Version> metaLogBegin;
|
|
|
|
state Optional<Version> metaLogEnd;
|
|
|
|
state Optional<Version> metaExpiredEnd;
|
|
|
|
state Optional<Version> metaUnreliableEnd;
|
|
|
|
state Optional<Version> metaLogType;
|
|
|
|
|
|
|
|
std::vector<Future<Void>> metaReads;
|
|
|
|
metaReads.push_back(store(metaExpiredEnd, bc->expiredEndVersion().get()));
|
|
|
|
metaReads.push_back(store(metaUnreliableEnd, bc->unreliableEndVersion().get()));
|
|
|
|
metaReads.push_back(store(metaLogType, bc->logType().get()));
|
|
|
|
|
|
|
|
// Only read log begin/end versions if not doing a deep scan, otherwise scan files and recalculate them.
|
|
|
|
if (!deepScan) {
|
|
|
|
metaReads.push_back(store(metaLogBegin, bc->logBeginVersion().get()));
|
|
|
|
metaReads.push_back(store(metaLogEnd, bc->logEndVersion().get()));
|
|
|
|
}
|
|
|
|
|
|
|
|
wait(waitForAll(metaReads));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
TraceEvent("BackupContainerDescribe2")
|
2020-10-21 09:55:40 +08:00
|
|
|
.detail("URL", bc->getURL())
|
2020-10-22 14:56:37 +08:00
|
|
|
.detail("LogStartVersionOverride", logStartVersionOverride)
|
2020-10-21 09:55:40 +08:00
|
|
|
.detail("ExpiredEndVersion", metaExpiredEnd.orDefault(invalidVersion))
|
|
|
|
.detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(invalidVersion))
|
|
|
|
.detail("LogBeginVersion", metaLogBegin.orDefault(invalidVersion))
|
2020-10-22 14:56:37 +08:00
|
|
|
.detail("LogEndVersion", metaLogEnd.orDefault(invalidVersion))
|
|
|
|
.detail("LogType", metaLogType.orDefault(-1));
|
|
|
|
|
|
|
|
// If the logStartVersionOverride is positive (not relative) then ensure that unreliableEndVersion is equal or
|
|
|
|
// greater
|
|
|
|
if (logStartVersionOverride != invalidVersion &&
|
|
|
|
metaUnreliableEnd.orDefault(invalidVersion) < logStartVersionOverride) {
|
|
|
|
metaUnreliableEnd = logStartVersionOverride;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Don't use metaLogBegin or metaLogEnd if any of the following are true, the safest
|
|
|
|
// thing to do is rescan to verify log continuity and get exact begin/end versions
|
|
|
|
// - either are missing
|
|
|
|
// - metaLogEnd <= metaLogBegin (invalid range)
|
|
|
|
// - metaLogEnd < metaExpiredEnd (log continuity exists in missing data range)
|
|
|
|
// - metaLogEnd < metaUnreliableEnd (log continuity exists in incomplete data range)
|
|
|
|
if (!metaLogBegin.present() || !metaLogEnd.present() || metaLogEnd.get() <= metaLogBegin.get() ||
|
|
|
|
metaLogEnd.get() < metaExpiredEnd.orDefault(invalidVersion) ||
|
|
|
|
metaLogEnd.get() < metaUnreliableEnd.orDefault(invalidVersion)) {
|
|
|
|
TraceEvent(SevWarnAlways, "BackupContainerMetadataInvalid")
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("ExpiredEndVersion", metaExpiredEnd.orDefault(invalidVersion))
|
|
|
|
.detail("UnreliableEndVersion", metaUnreliableEnd.orDefault(invalidVersion))
|
|
|
|
.detail("LogBeginVersion", metaLogBegin.orDefault(invalidVersion))
|
|
|
|
.detail("LogEndVersion", metaLogEnd.orDefault(invalidVersion));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
metaLogBegin = Optional<Version>();
|
|
|
|
metaLogEnd = Optional<Version>();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If the unreliable end version is not set or is < expiredEndVersion then increase it to expiredEndVersion.
|
|
|
|
// Describe does not update unreliableEnd in the backup metadata for safety reasons as there is no
|
|
|
|
// compare-and-set operation to atomically change it and an expire process could be advancing it simultaneously.
|
|
|
|
if (!metaUnreliableEnd.present() || metaUnreliableEnd.get() < metaExpiredEnd.orDefault(0))
|
|
|
|
metaUnreliableEnd = metaExpiredEnd;
|
|
|
|
|
|
|
|
desc.unreliableEndVersion = metaUnreliableEnd;
|
|
|
|
desc.expiredEndVersion = metaExpiredEnd;
|
|
|
|
|
|
|
|
// Start scanning at the end of the unreliable version range, which is the version before which data is likely
|
|
|
|
// missing because an expire process has operated on that range.
|
|
|
|
state Version scanBegin = desc.unreliableEndVersion.orDefault(0);
|
|
|
|
state Version scanEnd = std::numeric_limits<Version>::max();
|
|
|
|
|
|
|
|
// Use the known log range if present
|
|
|
|
// Logs are assumed to be contiguious between metaLogBegin and metaLogEnd, so initalize desc accordingly
|
|
|
|
if (metaLogBegin.present() && metaLogEnd.present()) {
|
|
|
|
// minLogBegin is the greater of the log begin metadata OR the unreliable end version since we can't count
|
|
|
|
// on log file presence before that version.
|
|
|
|
desc.minLogBegin = std::max(metaLogBegin.get(), desc.unreliableEndVersion.orDefault(0));
|
|
|
|
|
|
|
|
// Set the maximum known end version of a log file, so far, which is also the assumed contiguous log file
|
|
|
|
// end version
|
|
|
|
desc.maxLogEnd = metaLogEnd.get();
|
|
|
|
desc.contiguousLogEnd = desc.maxLogEnd;
|
|
|
|
|
|
|
|
// Advance scanBegin to the contiguous log end version
|
|
|
|
scanBegin = desc.contiguousLogEnd.get();
|
|
|
|
}
|
|
|
|
|
|
|
|
state std::vector<LogFile> logs;
|
|
|
|
state std::vector<LogFile> plogs;
|
|
|
|
TraceEvent("BackupContainerListFiles").detail("URL", bc->getURL());
|
|
|
|
|
|
|
|
wait(store(logs, bc->listLogFiles(scanBegin, scanEnd, false)) &&
|
|
|
|
store(plogs, bc->listLogFiles(scanBegin, scanEnd, true)) &&
|
|
|
|
store(desc.snapshots, bc->listKeyspaceSnapshots()));
|
|
|
|
|
|
|
|
TraceEvent("BackupContainerListFiles")
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("LogFiles", logs.size())
|
|
|
|
.detail("PLogsFiles", plogs.size())
|
|
|
|
.detail("Snapshots", desc.snapshots.size());
|
|
|
|
|
|
|
|
if (plogs.size() > 0) {
|
|
|
|
desc.partitioned = true;
|
|
|
|
logs.swap(plogs);
|
|
|
|
} else {
|
|
|
|
desc.partitioned =
|
|
|
|
metaLogType.present() && metaLogType.get() == BackupContainerFileSystemImpl::PARTITIONED_MUTATION_LOG;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// List logs in version order so log continuity can be analyzed
|
|
|
|
std::sort(logs.begin(), logs.end());
|
|
|
|
|
|
|
|
// Find out contiguous log end version
|
|
|
|
if (!logs.empty()) {
|
|
|
|
desc.maxLogEnd = logs.rbegin()->endVersion;
|
|
|
|
// If we didn't get log versions above then seed them using the first log file
|
|
|
|
if (!desc.contiguousLogEnd.present()) {
|
|
|
|
desc.minLogBegin = logs.begin()->beginVersion;
|
|
|
|
if (desc.partitioned) {
|
|
|
|
// Cannot use the first file's end version, which may not be contiguous
|
|
|
|
// for other partitions. Set to its beginVersion to be safe.
|
|
|
|
desc.contiguousLogEnd = logs.begin()->beginVersion;
|
|
|
|
} else {
|
|
|
|
desc.contiguousLogEnd = logs.begin()->endVersion;
|
|
|
|
}
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
if (desc.partitioned) {
|
2020-10-22 14:56:37 +08:00
|
|
|
updatePartitionedLogsContinuousEnd(&desc, logs, scanBegin, scanEnd);
|
2020-10-21 09:55:40 +08:00
|
|
|
} else {
|
2020-10-22 14:56:37 +08:00
|
|
|
Version& end = desc.contiguousLogEnd.get();
|
|
|
|
computeRestoreEndVersion(logs, nullptr, &end, std::numeric_limits<Version>::max());
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Only update stored contiguous log begin and end versions if we did NOT use a log start override.
|
|
|
|
// Otherwise, a series of describe operations can result in a version range which is actually missing data.
|
|
|
|
if (logStartVersionOverride == invalidVersion) {
|
|
|
|
// If the log metadata begin/end versions are missing (or treated as missing due to invalidity) or
|
|
|
|
// differ from the newly calculated values for minLogBegin and contiguousLogEnd, respectively,
|
|
|
|
// then attempt to update the metadata in the backup container but ignore errors in case the
|
|
|
|
// container is not writeable.
|
|
|
|
try {
|
|
|
|
state Future<Void> updates = Void();
|
|
|
|
|
|
|
|
if (desc.minLogBegin.present() && metaLogBegin != desc.minLogBegin) {
|
|
|
|
updates = updates && bc->logBeginVersion().set(desc.minLogBegin.get());
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (desc.contiguousLogEnd.present() && metaLogEnd != desc.contiguousLogEnd) {
|
|
|
|
updates = updates && bc->logEndVersion().set(desc.contiguousLogEnd.get());
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (!metaLogType.present()) {
|
|
|
|
updates =
|
|
|
|
updates && bc->logType().set(desc.partitioned
|
|
|
|
? BackupContainerFileSystemImpl::PARTITIONED_MUTATION_LOG
|
|
|
|
: BackupContainerFileSystemImpl::NON_PARTITIONED_MUTATION_LOG);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
wait(updates);
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_actor_cancelled)
|
|
|
|
throw;
|
|
|
|
TraceEvent(SevWarn, "BackupContainerMetadataUpdateFailure").error(e).detail("URL", bc->getURL());
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
for (auto& s : desc.snapshots) {
|
|
|
|
// Calculate restorability of each snapshot. Assume true, then try to prove false
|
|
|
|
s.restorable = true;
|
|
|
|
// If this is not a single-version snapshot then see if the available contiguous logs cover its range
|
|
|
|
if (s.beginVersion != s.endVersion) {
|
|
|
|
if (!desc.minLogBegin.present() || desc.minLogBegin.get() > s.beginVersion)
|
|
|
|
s.restorable = false;
|
|
|
|
if (!desc.contiguousLogEnd.present() || desc.contiguousLogEnd.get() <= s.endVersion)
|
|
|
|
s.restorable = false;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
desc.snapshotBytes += s.totalSize;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If the snapshot is at a single version then it requires no logs. Update min and max restorable.
|
|
|
|
// TODO: Somehow check / report if the restorable range is not or may not be contiguous.
|
|
|
|
if (s.beginVersion == s.endVersion) {
|
|
|
|
if (!desc.minRestorableVersion.present() || s.endVersion < desc.minRestorableVersion.get())
|
|
|
|
desc.minRestorableVersion = s.endVersion;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (!desc.maxRestorableVersion.present() || s.endVersion > desc.maxRestorableVersion.get())
|
|
|
|
desc.maxRestorableVersion = s.endVersion;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If the snapshot is covered by the contiguous log chain then update min/max restorable.
|
|
|
|
if (desc.minLogBegin.present() && s.beginVersion >= desc.minLogBegin.get() &&
|
|
|
|
s.endVersion < desc.contiguousLogEnd.get()) {
|
|
|
|
if (!desc.minRestorableVersion.present() || s.endVersion < desc.minRestorableVersion.get())
|
|
|
|
desc.minRestorableVersion = s.endVersion;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (!desc.maxRestorableVersion.present() ||
|
|
|
|
(desc.contiguousLogEnd.get() - 1) > desc.maxRestorableVersion.get())
|
|
|
|
desc.maxRestorableVersion = desc.contiguousLogEnd.get() - 1;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
return desc;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
ACTOR static Future<Void> expireData(Reference<BackupContainerFileSystem> bc,
|
|
|
|
Version expireEndVersion,
|
|
|
|
bool force,
|
|
|
|
IBackupContainer::ExpireProgress* progress,
|
|
|
|
Version restorableBeginVersion) {
|
|
|
|
if (progress != nullptr) {
|
|
|
|
progress->step = "Describing backup";
|
|
|
|
progress->total = 0;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
TraceEvent("BackupContainerFileSystemExpire1")
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("ExpireEndVersion", expireEndVersion)
|
|
|
|
.detail("RestorableBeginVersion", restorableBeginVersion);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Get the backup description.
|
|
|
|
state BackupDescription desc = wait(bc->describeBackup(false, expireEndVersion));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Resolve relative versions using max log version
|
|
|
|
expireEndVersion =
|
|
|
|
resolveRelativeVersion(desc.maxLogEnd, expireEndVersion, "ExpireEndVersion", invalid_option_value());
|
|
|
|
restorableBeginVersion = resolveRelativeVersion(
|
|
|
|
desc.maxLogEnd, restorableBeginVersion, "RestorableBeginVersion", invalid_option_value());
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// It would be impossible to have restorability to any version < expireEndVersion after expiring to that version
|
|
|
|
if (restorableBeginVersion < expireEndVersion)
|
|
|
|
throw backup_cannot_expire();
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If the expire request is to a version at or before the previous version to which data was already deleted
|
|
|
|
// then do nothing and just return
|
|
|
|
if (expireEndVersion <= desc.expiredEndVersion.orDefault(invalidVersion)) {
|
|
|
|
return Void();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Assume force is needed, then try to prove otherwise.
|
|
|
|
// Force is required if there is not a restorable snapshot which both
|
|
|
|
// - begins at or after expireEndVersion
|
|
|
|
// - ends at or before restorableBeginVersion
|
|
|
|
state bool forceNeeded = true;
|
|
|
|
for (KeyspaceSnapshotFile& s : desc.snapshots) {
|
|
|
|
if (s.restorable.orDefault(false) && s.beginVersion >= expireEndVersion &&
|
|
|
|
s.endVersion <= restorableBeginVersion) {
|
|
|
|
forceNeeded = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// If force is needed but not passed then refuse to expire anything.
|
|
|
|
// Note that it is possible for there to be no actual files in the backup prior to expireEndVersion,
|
|
|
|
// if they were externally deleted or an expire operation deleted them but was terminated before
|
|
|
|
// updating expireEndVersion
|
|
|
|
if (forceNeeded && !force)
|
|
|
|
throw backup_cannot_expire();
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Start scan for files to delete at the last completed expire operation's end or 0.
|
|
|
|
state Version scanBegin = desc.expiredEndVersion.orDefault(0);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
TraceEvent("BackupContainerFileSystemExpire2")
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("ExpireEndVersion", expireEndVersion)
|
|
|
|
.detail("RestorableBeginVersion", restorableBeginVersion)
|
|
|
|
.detail("ScanBeginVersion", scanBegin);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
state std::vector<LogFile> logs;
|
|
|
|
state std::vector<LogFile> pLogs; // partitioned mutation logs
|
|
|
|
state std::vector<RangeFile> ranges;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (progress != nullptr) {
|
|
|
|
progress->step = "Listing files";
|
|
|
|
}
|
|
|
|
// Get log files or range files that contain any data at or before expireEndVersion
|
|
|
|
wait(store(logs, bc->listLogFiles(scanBegin, expireEndVersion - 1, false)) &&
|
|
|
|
store(pLogs, bc->listLogFiles(scanBegin, expireEndVersion - 1, true)) &&
|
|
|
|
store(ranges, bc->listRangeFiles(scanBegin, expireEndVersion - 1)));
|
|
|
|
logs.insert(logs.end(), std::make_move_iterator(pLogs.begin()), std::make_move_iterator(pLogs.end()));
|
|
|
|
|
|
|
|
// The new logBeginVersion will be taken from the last log file, if there is one
|
|
|
|
state Optional<Version> newLogBeginVersion;
|
|
|
|
if (!logs.empty()) {
|
|
|
|
// Linear scan the unsorted logs to find the latest one in sorted order
|
|
|
|
LogFile& last = *std::max_element(logs.begin(), logs.end());
|
|
|
|
|
|
|
|
// If the last log ends at expireEndVersion then that will be the next log begin
|
|
|
|
if (last.endVersion == expireEndVersion) {
|
|
|
|
newLogBeginVersion = expireEndVersion;
|
|
|
|
} else {
|
|
|
|
// If the last log overlaps the expiredEnd then use the log's begin version and move the expiredEnd
|
|
|
|
// back to match it and keep the last log file
|
|
|
|
if (last.endVersion > expireEndVersion) {
|
|
|
|
newLogBeginVersion = last.beginVersion;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Instead of modifying this potentially very large vector, just clear LogFile
|
|
|
|
last = LogFile();
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
expireEndVersion = newLogBeginVersion.get();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Make a list of files to delete
|
|
|
|
state std::vector<std::string> toDelete;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Move filenames out of vector then destroy it to save memory
|
|
|
|
for (auto const& f : logs) {
|
|
|
|
// We may have cleared the last log file earlier so skip any empty filenames
|
|
|
|
if (!f.fileName.empty()) {
|
|
|
|
toDelete.push_back(std::move(f.fileName));
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
logs.clear();
|
|
|
|
|
|
|
|
// Move filenames out of vector then destroy it to save memory
|
|
|
|
for (auto const& f : ranges) {
|
|
|
|
// The file version must be checked here again because it is likely that expireEndVersion is in the middle
|
|
|
|
// of a log file, in which case after the log and range file listings are done (using the original
|
|
|
|
// expireEndVersion) the expireEndVersion will be moved back slightly to the begin version of the last log
|
|
|
|
// file found (which is also the first log to not be deleted)
|
|
|
|
if (f.version < expireEndVersion) {
|
|
|
|
toDelete.push_back(std::move(f.fileName));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ranges.clear();
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
for (auto const& f : desc.snapshots) {
|
|
|
|
if (f.endVersion < expireEndVersion)
|
|
|
|
toDelete.push_back(std::move(f.fileName));
|
|
|
|
}
|
|
|
|
desc = BackupDescription();
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// We are about to start deleting files, at which point all data prior to expireEndVersion is considered
|
|
|
|
// 'unreliable' as some or all of it will be missing. So before deleting anything, read unreliableEndVersion
|
|
|
|
// (don't use cached value in desc) and update its value if it is missing or < expireEndVersion
|
|
|
|
if (progress != nullptr) {
|
|
|
|
progress->step = "Initial metadata update";
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
Optional<Version> metaUnreliableEnd = wait(bc->unreliableEndVersion().get());
|
|
|
|
if (metaUnreliableEnd.orDefault(0) < expireEndVersion) {
|
|
|
|
wait(bc->unreliableEndVersion().set(expireEndVersion));
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (progress != nullptr) {
|
|
|
|
progress->step = "Deleting files";
|
|
|
|
progress->total = toDelete.size();
|
|
|
|
progress->done = 0;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Delete files, but limit parallelism because the file list could use a lot of memory and the corresponding
|
|
|
|
// delete actor states would use even more if they all existed at the same time.
|
|
|
|
state std::list<Future<Void>> deleteFutures;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
while (!toDelete.empty() || !deleteFutures.empty()) {
|
|
|
|
|
|
|
|
// While there are files to delete and budget in the deleteFutures list, start a delete
|
|
|
|
while (!toDelete.empty() && deleteFutures.size() < CLIENT_KNOBS->BACKUP_CONCURRENT_DELETES) {
|
|
|
|
deleteFutures.push_back(bc->deleteFile(toDelete.back()));
|
|
|
|
toDelete.pop_back();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Wait for deletes to finish until there are only targetDeletesInFlight remaining.
|
|
|
|
// If there are no files left to start then this value is 0, otherwise it is one less
|
|
|
|
// than the delete concurrency limit.
|
|
|
|
state int targetFuturesSize = toDelete.empty() ? 0 : (CLIENT_KNOBS->BACKUP_CONCURRENT_DELETES - 1);
|
|
|
|
|
|
|
|
while (deleteFutures.size() > targetFuturesSize) {
|
|
|
|
wait(deleteFutures.front());
|
|
|
|
if (progress != nullptr) {
|
|
|
|
++progress->done;
|
|
|
|
}
|
|
|
|
deleteFutures.pop_front();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (progress != nullptr) {
|
|
|
|
progress->step = "Final metadata update";
|
|
|
|
progress->total = 0;
|
|
|
|
}
|
|
|
|
// Update the expiredEndVersion metadata to indicate that everything prior to that version has been
|
|
|
|
// successfully deleted if the current version is lower or missing
|
|
|
|
Optional<Version> metaExpiredEnd = wait(bc->expiredEndVersion().get());
|
|
|
|
if (metaExpiredEnd.orDefault(0) < expireEndVersion) {
|
|
|
|
wait(bc->expiredEndVersion().set(expireEndVersion));
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Returns true if logs are continuous in the range [begin, end].
|
|
|
|
// "files" should be pre-sorted according to version order.
|
|
|
|
static bool isPartitionedLogsContinuous(const std::vector<LogFile>& files, Version begin, Version end) {
|
|
|
|
std::map<int, std::vector<int>> tagIndices; // tagId -> indices in files
|
|
|
|
for (int i = 0; i < files.size(); i++) {
|
|
|
|
ASSERT(files[i].tagId >= 0 && files[i].tagId < files[i].totalTags);
|
|
|
|
auto& indices = tagIndices[files[i].tagId];
|
|
|
|
indices.push_back(i);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// check partition 0 is continuous and create a map of ranges to tags
|
|
|
|
std::map<std::pair<Version, Version>, int> tags; // range [begin, end] -> tags
|
|
|
|
if (!isContinuous(files, tagIndices[0], begin, end, &tags)) {
|
|
|
|
TraceEvent(SevWarn, "BackupFileNotContinuous")
|
|
|
|
.detail("Partition", 0)
|
|
|
|
.detail("RangeBegin", begin)
|
|
|
|
.detail("RangeEnd", end);
|
|
|
|
return false;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// for each range in tags, check all tags from 1 are continouous
|
|
|
|
for (const auto& [beginEnd, count] : tags) {
|
|
|
|
for (int i = 1; i < count; i++) {
|
|
|
|
if (!isContinuous(files, tagIndices[i], beginEnd.first, std::min(beginEnd.second - 1, end), nullptr)) {
|
|
|
|
TraceEvent(SevWarn, "BackupFileNotContinuous")
|
|
|
|
.detail("Partition", i)
|
|
|
|
.detail("RangeBegin", beginEnd.first)
|
|
|
|
.detail("RangeEnd", beginEnd.second);
|
|
|
|
return false;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
return true;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Returns log files that are not duplicated, or subset of another log.
|
|
|
|
// If a log file's progress is not saved, a new log file will be generated
|
|
|
|
// with the same begin version. So we can have a file that contains a subset
|
|
|
|
// of contents in another log file.
|
|
|
|
// PRE-CONDITION: logs are already sorted by (tagId, beginVersion, endVersion).
|
|
|
|
static std::vector<LogFile> filterDuplicates(const std::vector<LogFile>& logs) {
|
|
|
|
std::vector<LogFile> filtered;
|
|
|
|
int i = 0;
|
|
|
|
for (int j = 1; j < logs.size(); j++) {
|
|
|
|
if (logs[j].isSubset(logs[i])) {
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_LE(logs[j].fileSize, logs[i].fileSize);
|
2020-10-22 14:56:37 +08:00
|
|
|
continue;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (!logs[i].isSubset(logs[j])) {
|
|
|
|
filtered.push_back(logs[i]);
|
|
|
|
}
|
|
|
|
i = j;
|
|
|
|
}
|
|
|
|
if (i < logs.size())
|
|
|
|
filtered.push_back(logs[i]);
|
|
|
|
return filtered;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
static Optional<RestorableFileSet> getRestoreSetFromLogs(const std::vector<LogFile>& logs,
|
|
|
|
Version targetVersion,
|
|
|
|
RestorableFileSet restorable) {
|
|
|
|
Version end = logs.begin()->endVersion;
|
|
|
|
computeRestoreEndVersion(logs, &restorable.logs, &end, targetVersion);
|
|
|
|
if (end >= targetVersion) {
|
|
|
|
restorable.continuousBeginVersion = logs.begin()->beginVersion;
|
|
|
|
restorable.continuousEndVersion = end;
|
|
|
|
return Optional<RestorableFileSet>(restorable);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
return Optional<RestorableFileSet>();
|
|
|
|
}
|
|
|
|
|
2021-03-30 01:09:07 +08:00
|
|
|
// Get a set of files that can restore the given "keyRangesFilter" to the "targetVersion".
|
2021-03-30 02:49:23 +08:00
|
|
|
// If "keyRangesFilter" is empty, the file set will cover all key ranges present in the backup.
|
|
|
|
// It's generally a good idea to specify "keyRangesFilter" to reduce the number of files for
|
|
|
|
// restore times.
|
2021-03-30 01:09:07 +08:00
|
|
|
//
|
|
|
|
// If "logsOnly" is true, then only log files are returned and "keyRangesFilter" is ignored,
|
|
|
|
// because the log can contain mutations of the whole key space, unlike range files that each
|
|
|
|
// is limited to a smaller key range.
|
2020-10-22 14:56:37 +08:00
|
|
|
ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet(Reference<BackupContainerFileSystem> bc,
|
|
|
|
Version targetVersion,
|
|
|
|
VectorRef<KeyRangeRef> keyRangesFilter,
|
|
|
|
bool logsOnly = false,
|
|
|
|
Version beginVersion = invalidVersion) {
|
2021-03-25 06:54:06 +08:00
|
|
|
for (const auto& range : keyRangesFilter) {
|
|
|
|
TraceEvent("BackupContainerGetRestoreSet").detail("RangeFilter", printable(range));
|
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
if (logsOnly) {
|
|
|
|
state RestorableFileSet restorableSet;
|
2023-02-27 04:17:07 +08:00
|
|
|
restorableSet.targetVersion = targetVersion;
|
2020-10-22 14:56:37 +08:00
|
|
|
state std::vector<LogFile> logFiles;
|
|
|
|
Version begin = beginVersion == invalidVersion ? 0 : beginVersion;
|
|
|
|
wait(store(logFiles, bc->listLogFiles(begin, targetVersion, false)));
|
|
|
|
// List logs in version order so log continuity can be analyzed
|
|
|
|
std::sort(logFiles.begin(), logFiles.end());
|
|
|
|
if (!logFiles.empty()) {
|
|
|
|
return getRestoreSetFromLogs(logFiles, targetVersion, restorableSet);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Find the most recent keyrange snapshot through which we can restore filtered key ranges into targetVersion.
|
|
|
|
state std::vector<KeyspaceSnapshotFile> snapshots = wait(bc->listKeyspaceSnapshots());
|
|
|
|
state int i = snapshots.size() - 1;
|
|
|
|
for (; i >= 0; i--) {
|
|
|
|
// The smallest version of filtered range files >= snapshot beginVersion > targetVersion
|
|
|
|
if (targetVersion >= 0 && snapshots[i].beginVersion > targetVersion) {
|
|
|
|
continue;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
state RestorableFileSet restorable;
|
|
|
|
state Version minKeyRangeVersion = MAX_VERSION;
|
|
|
|
state Version maxKeyRangeVersion = -1;
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>> results =
|
|
|
|
wait(bc->readKeyspaceSnapshot(snapshots[i]));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2023-02-28 13:40:46 +08:00
|
|
|
// If there is no key ranges filter for the restore OR if the snapshot contains no per-file key range info
|
|
|
|
// then return all of the range files
|
|
|
|
if (keyRangesFilter.empty() || results.second.empty()) {
|
2020-10-22 14:56:37 +08:00
|
|
|
restorable.ranges = std::move(results.first);
|
|
|
|
restorable.keyRanges = std::move(results.second);
|
|
|
|
minKeyRangeVersion = snapshots[i].beginVersion;
|
|
|
|
maxKeyRangeVersion = snapshots[i].endVersion;
|
|
|
|
} else {
|
|
|
|
for (const auto& rangeFile : results.first) {
|
|
|
|
const auto& keyRange = results.second.at(rangeFile.fileName);
|
|
|
|
if (keyRange.intersects(keyRangesFilter)) {
|
|
|
|
restorable.ranges.push_back(rangeFile);
|
|
|
|
restorable.keyRanges[rangeFile.fileName] = keyRange;
|
|
|
|
minKeyRangeVersion = std::min(minKeyRangeVersion, rangeFile.version);
|
|
|
|
maxKeyRangeVersion = std::max(maxKeyRangeVersion, rangeFile.version);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// No range file matches 'keyRangesFilter'.
|
|
|
|
if (restorable.ranges.empty()) {
|
|
|
|
throw backup_not_overlapped_with_keys_filter();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
// 'latestVersion' represents using the minimum restorable version in a snapshot.
|
2023-02-27 04:17:07 +08:00
|
|
|
restorable.targetVersion = targetVersion == latestVersion ? maxKeyRangeVersion : targetVersion;
|
2020-10-22 14:56:37 +08:00
|
|
|
// Any version < maxKeyRangeVersion is not restorable.
|
|
|
|
if (restorable.targetVersion < maxKeyRangeVersion)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
restorable.snapshot = snapshots[i];
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// No logs needed if there is a complete filtered key space snapshot at the target version.
|
|
|
|
if (minKeyRangeVersion == maxKeyRangeVersion && maxKeyRangeVersion == restorable.targetVersion) {
|
|
|
|
restorable.continuousBeginVersion = restorable.continuousEndVersion = invalidVersion;
|
|
|
|
TraceEvent("BackupContainerGetRestorableFilesWithoutLogs")
|
|
|
|
.detail("KeyRangeVersion", restorable.targetVersion)
|
|
|
|
.detail("NumberOfRangeFiles", restorable.ranges.size())
|
|
|
|
.detail("KeyRangesFilter", printable(keyRangesFilter));
|
|
|
|
return Optional<RestorableFileSet>(restorable);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// FIXME: check if there are tagged logs. for each tag, there is no version gap.
|
|
|
|
state std::vector<LogFile> logs;
|
|
|
|
state std::vector<LogFile> plogs;
|
|
|
|
wait(store(logs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, false)) &&
|
|
|
|
store(plogs, bc->listLogFiles(minKeyRangeVersion, restorable.targetVersion, true)));
|
|
|
|
|
|
|
|
if (plogs.size() > 0) {
|
|
|
|
logs.swap(plogs);
|
|
|
|
// sort by tag ID so that filterDuplicates works.
|
|
|
|
std::sort(logs.begin(), logs.end(), [](const LogFile& a, const LogFile& b) {
|
|
|
|
return std::tie(a.tagId, a.beginVersion, a.endVersion) <
|
|
|
|
std::tie(b.tagId, b.beginVersion, b.endVersion);
|
|
|
|
});
|
|
|
|
|
|
|
|
// Remove duplicated log files that can happen for old epochs.
|
|
|
|
std::vector<LogFile> filtered = filterDuplicates(logs);
|
|
|
|
restorable.logs.swap(filtered);
|
|
|
|
// sort by version order again for continuous analysis
|
|
|
|
std::sort(restorable.logs.begin(), restorable.logs.end());
|
|
|
|
if (isPartitionedLogsContinuous(restorable.logs, minKeyRangeVersion, restorable.targetVersion)) {
|
|
|
|
restorable.continuousBeginVersion = minKeyRangeVersion;
|
|
|
|
restorable.continuousEndVersion = restorable.targetVersion + 1; // not inclusive
|
|
|
|
return Optional<RestorableFileSet>(restorable);
|
|
|
|
}
|
|
|
|
return Optional<RestorableFileSet>();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// List logs in version order so log continuity can be analyzed
|
|
|
|
std::sort(logs.begin(), logs.end());
|
|
|
|
// If there are logs and the first one starts at or before the keyrange's snapshot begin version, then
|
|
|
|
// it is valid restore set and proceed
|
|
|
|
if (!logs.empty() && logs.front().beginVersion <= minKeyRangeVersion) {
|
|
|
|
return getRestoreSetFromLogs(logs, targetVersion, restorable);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
}
|
2020-10-22 14:56:37 +08:00
|
|
|
return Optional<RestorableFileSet>();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
static std::string versionFolderString(Version v, int smallestBucket) {
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_LT(smallestBucket, 14);
|
2020-10-22 14:56:37 +08:00
|
|
|
// Get a 0-padded fixed size representation of v
|
|
|
|
std::string vFixedPrecision = format("%019lld", v);
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_EQ(vFixedPrecision.size(), 19);
|
2020-10-22 14:56:37 +08:00
|
|
|
// Truncate smallestBucket from the fixed length representation
|
|
|
|
vFixedPrecision.resize(vFixedPrecision.size() - smallestBucket);
|
|
|
|
|
|
|
|
// Split the remaining digits with a '/' 4 places from the right
|
|
|
|
vFixedPrecision.insert(vFixedPrecision.size() - 4, 1, '/');
|
|
|
|
|
|
|
|
return vFixedPrecision;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// This useful for comparing version folder strings regardless of where their "/" dividers are, as it is possible
|
|
|
|
// that division points would change in the future.
|
|
|
|
static std::string cleanFolderString(std::string f) {
|
|
|
|
f.erase(std::remove(f.begin(), f.end(), '/'), f.end());
|
|
|
|
return f;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// The innermost folder covers 100 seconds (1e8 versions) During a full speed backup it is possible though very
|
|
|
|
// unlikely write about 10,000 snapshot range files during that time.
|
|
|
|
static std::string old_rangeVersionFolderString(Version v) {
|
|
|
|
return format("ranges/%s/", versionFolderString(v, 8).c_str());
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Get the root folder for a snapshot's data based on its begin version
|
|
|
|
static std::string snapshotFolderString(Version snapshotBeginVersion) {
|
|
|
|
return format("kvranges/snapshot.%018" PRId64, snapshotBeginVersion);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// Extract the snapshot begin version from a path
|
|
|
|
static Version extractSnapshotBeginVersion(const std::string& path) {
|
|
|
|
Version snapshotBeginVersion;
|
|
|
|
if (sscanf(path.c_str(), "kvranges/snapshot.%018" SCNd64, &snapshotBeginVersion) == 1) {
|
|
|
|
return snapshotBeginVersion;
|
|
|
|
}
|
|
|
|
return invalidVersion;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
// The innermost folder covers 100,000 seconds (1e11 versions) which is 5,000 mutation log files at current
|
|
|
|
// settings.
|
|
|
|
static std::string logVersionFolderString(Version v, bool partitioned) {
|
|
|
|
return format("%s/%s/", (partitioned ? "plogs" : "logs"), versionFolderString(v, 11).c_str());
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
static bool pathToLogFile(LogFile& out, const std::string& path, int64_t size) {
|
|
|
|
std::string name = fileNameOnly(path);
|
|
|
|
LogFile f;
|
|
|
|
f.fileName = path;
|
|
|
|
f.fileSize = size;
|
|
|
|
int len;
|
|
|
|
if (sscanf(name.c_str(),
|
|
|
|
"log,%" SCNd64 ",%" SCNd64 ",%*[^,],%u%n",
|
|
|
|
&f.beginVersion,
|
|
|
|
&f.endVersion,
|
|
|
|
&f.blockSize,
|
|
|
|
&len) == 3 &&
|
|
|
|
len == name.size()) {
|
|
|
|
out = f;
|
|
|
|
return true;
|
|
|
|
} else if (sscanf(name.c_str(),
|
|
|
|
"log,%" SCNd64 ",%" SCNd64 ",%*[^,],%d-of-%d,%u%n",
|
|
|
|
&f.beginVersion,
|
|
|
|
&f.endVersion,
|
|
|
|
&f.tagId,
|
|
|
|
&f.totalTags,
|
|
|
|
&f.blockSize,
|
|
|
|
&len) == 5 &&
|
|
|
|
len == name.size() && f.tagId >= 0) {
|
|
|
|
out = f;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
static bool pathToKeyspaceSnapshotFile(KeyspaceSnapshotFile& out, const std::string& path) {
|
|
|
|
std::string name = fileNameOnly(path);
|
|
|
|
KeyspaceSnapshotFile f;
|
|
|
|
f.fileName = path;
|
|
|
|
int len;
|
|
|
|
if (sscanf(name.c_str(),
|
|
|
|
"snapshot,%" SCNd64 ",%" SCNd64 ",%" SCNd64 "%n",
|
|
|
|
&f.beginVersion,
|
|
|
|
&f.endVersion,
|
|
|
|
&f.totalSize,
|
|
|
|
&len) == 3 &&
|
|
|
|
len == name.size()) {
|
|
|
|
out = f;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2022-11-09 07:30:01 +08:00
|
|
|
// fallback for using existing write api if the underlying blob store doesn't support efficient writeEntireFile
|
|
|
|
ACTOR static Future<Void> writeEntireFileFallback(Reference<BackupContainerFileSystem> bc,
|
|
|
|
std::string fileName,
|
|
|
|
std::string fileContents) {
|
|
|
|
state Reference<IBackupFile> objectFile = wait(bc->writeFile(fileName));
|
|
|
|
wait(objectFile->append(&fileContents[0], fileContents.size()));
|
|
|
|
wait(objectFile->finish());
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-06-27 02:15:12 +08:00
|
|
|
ACTOR static Future<Void> createTestEncryptionKeyFile(std::string filename) {
|
|
|
|
state Reference<IAsyncFile> keyFile = wait(IAsyncFileSystem::filesystem()->open(
|
|
|
|
filename,
|
|
|
|
IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_CREATE,
|
|
|
|
0600));
|
Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor (#6314)
* Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor
Major changes proposed are:
1. Refactor StreamCipher code to enable instantiation of
multiple encryption keys. However, code still retains
a globalEncryption key semantics used in Backup file
encryption usecase.
2. Enhance StreamCipher to provide HMAC signature digest
generation. Further, the class implements HMAC encryption
key derivation function.
3. Upgrade StreamCipher to use AES 256 GCM mode from currently
supported AES 128 GCM mode.
Note: The code changes the encryption key size, however, the
feature is NOT currently in use, hence, should be OK.
3. Add EncryptionOps validation and benchmark toml supported
workload, it does the following:
a. Allow user to configure encrypt-decrypt of a fixed size
buffer or variable size buffer [100, 512K]
b. Allow user to configure number of interactions of the runs,
in each iteration: generate random data, derive an encryption
key using HMAC SHA256 method, encrypt data and
then decrypt data. It collects following metrics:
i) time taken to derive encryption key.
ii) time taken to encrypt the buffer.
iii) time taken to decrypt the buffer.
iv) total bytes encrypted and/or decrypted
c. Along with stats it basic basic validations on the encrypted
and decrypted buffer
d. On completion for test, records the above mentioned metrics
in trace files.
2022-02-01 09:52:44 +08:00
|
|
|
StreamCipherKey testKey(AES_256_KEY_LENGTH);
|
|
|
|
testKey.initializeRandomTestKey();
|
2021-06-27 02:15:12 +08:00
|
|
|
keyFile->write(testKey.data(), testKey.size(), 0);
|
|
|
|
wait(keyFile->sync());
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-06-26 15:07:27 +08:00
|
|
|
ACTOR static Future<Void> readEncryptionKey(std::string encryptionKeyFileName) {
|
2021-06-27 02:15:12 +08:00
|
|
|
state Reference<IAsyncFile> keyFile;
|
Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor (#6314)
* Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor
Major changes proposed are:
1. Refactor StreamCipher code to enable instantiation of
multiple encryption keys. However, code still retains
a globalEncryption key semantics used in Backup file
encryption usecase.
2. Enhance StreamCipher to provide HMAC signature digest
generation. Further, the class implements HMAC encryption
key derivation function.
3. Upgrade StreamCipher to use AES 256 GCM mode from currently
supported AES 128 GCM mode.
Note: The code changes the encryption key size, however, the
feature is NOT currently in use, hence, should be OK.
3. Add EncryptionOps validation and benchmark toml supported
workload, it does the following:
a. Allow user to configure encrypt-decrypt of a fixed size
buffer or variable size buffer [100, 512K]
b. Allow user to configure number of interactions of the runs,
in each iteration: generate random data, derive an encryption
key using HMAC SHA256 method, encrypt data and
then decrypt data. It collects following metrics:
i) time taken to derive encryption key.
ii) time taken to encrypt the buffer.
iii) time taken to decrypt the buffer.
iv) total bytes encrypted and/or decrypted
c. Along with stats it basic basic validations on the encrypted
and decrypted buffer
d. On completion for test, records the above mentioned metrics
in trace files.
2022-02-01 09:52:44 +08:00
|
|
|
state StreamCipherKey const* cipherKey = StreamCipherKey::getGlobalCipherKey();
|
2021-06-27 02:15:12 +08:00
|
|
|
try {
|
|
|
|
Reference<IAsyncFile> _keyFile =
|
|
|
|
wait(IAsyncFileSystem::filesystem()->open(encryptionKeyFileName, 0x0, 0400));
|
2021-06-27 08:38:57 +08:00
|
|
|
keyFile = _keyFile;
|
2021-06-27 02:15:12 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevWarnAlways, "FailedToOpenEncryptionKeyFile")
|
2022-02-25 04:25:52 +08:00
|
|
|
.error(e)
|
|
|
|
.detail("FileName", encryptionKeyFileName);
|
2021-06-27 02:15:12 +08:00
|
|
|
throw e;
|
|
|
|
}
|
Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor (#6314)
* Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor
Major changes proposed are:
1. Refactor StreamCipher code to enable instantiation of
multiple encryption keys. However, code still retains
a globalEncryption key semantics used in Backup file
encryption usecase.
2. Enhance StreamCipher to provide HMAC signature digest
generation. Further, the class implements HMAC encryption
key derivation function.
3. Upgrade StreamCipher to use AES 256 GCM mode from currently
supported AES 128 GCM mode.
Note: The code changes the encryption key size, however, the
feature is NOT currently in use, hence, should be OK.
3. Add EncryptionOps validation and benchmark toml supported
workload, it does the following:
a. Allow user to configure encrypt-decrypt of a fixed size
buffer or variable size buffer [100, 512K]
b. Allow user to configure number of interactions of the runs,
in each iteration: generate random data, derive an encryption
key using HMAC SHA256 method, encrypt data and
then decrypt data. It collects following metrics:
i) time taken to derive encryption key.
ii) time taken to encrypt the buffer.
iii) time taken to decrypt the buffer.
iv) total bytes encrypted and/or decrypted
c. Along with stats it basic basic validations on the encrypted
and decrypted buffer
d. On completion for test, records the above mentioned metrics
in trace files.
2022-02-01 09:52:44 +08:00
|
|
|
int bytesRead = wait(keyFile->read(cipherKey->data(), cipherKey->size(), 0));
|
|
|
|
if (bytesRead != cipherKey->size()) {
|
2021-06-27 02:15:12 +08:00
|
|
|
TraceEvent(SevWarnAlways, "InvalidEncryptionKeyFileSize")
|
Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor (#6314)
* Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor
Major changes proposed are:
1. Refactor StreamCipher code to enable instantiation of
multiple encryption keys. However, code still retains
a globalEncryption key semantics used in Backup file
encryption usecase.
2. Enhance StreamCipher to provide HMAC signature digest
generation. Further, the class implements HMAC encryption
key derivation function.
3. Upgrade StreamCipher to use AES 256 GCM mode from currently
supported AES 128 GCM mode.
Note: The code changes the encryption key size, however, the
feature is NOT currently in use, hence, should be OK.
3. Add EncryptionOps validation and benchmark toml supported
workload, it does the following:
a. Allow user to configure encrypt-decrypt of a fixed size
buffer or variable size buffer [100, 512K]
b. Allow user to configure number of interactions of the runs,
in each iteration: generate random data, derive an encryption
key using HMAC SHA256 method, encrypt data and
then decrypt data. It collects following metrics:
i) time taken to derive encryption key.
ii) time taken to encrypt the buffer.
iii) time taken to decrypt the buffer.
iv) total bytes encrypted and/or decrypted
c. Along with stats it basic basic validations on the encrypted
and decrypted buffer
d. On completion for test, records the above mentioned metrics
in trace files.
2022-02-01 09:52:44 +08:00
|
|
|
.detail("ExpectedSize", cipherKey->size())
|
2021-06-27 02:15:12 +08:00
|
|
|
.detail("ActualSize", bytesRead);
|
|
|
|
throw invalid_encryption_key_file();
|
|
|
|
}
|
Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor (#6314)
* Upgrade AES 128 GCM -> AES 256, StreamCipher code refactor
Major changes proposed are:
1. Refactor StreamCipher code to enable instantiation of
multiple encryption keys. However, code still retains
a globalEncryption key semantics used in Backup file
encryption usecase.
2. Enhance StreamCipher to provide HMAC signature digest
generation. Further, the class implements HMAC encryption
key derivation function.
3. Upgrade StreamCipher to use AES 256 GCM mode from currently
supported AES 128 GCM mode.
Note: The code changes the encryption key size, however, the
feature is NOT currently in use, hence, should be OK.
3. Add EncryptionOps validation and benchmark toml supported
workload, it does the following:
a. Allow user to configure encrypt-decrypt of a fixed size
buffer or variable size buffer [100, 512K]
b. Allow user to configure number of interactions of the runs,
in each iteration: generate random data, derive an encryption
key using HMAC SHA256 method, encrypt data and
then decrypt data. It collects following metrics:
i) time taken to derive encryption key.
ii) time taken to encrypt the buffer.
iii) time taken to decrypt the buffer.
iv) total bytes encrypted and/or decrypted
c. Along with stats it basic basic validations on the encrypted
and decrypted buffer
d. On completion for test, records the above mentioned metrics
in trace files.
2022-02-01 09:52:44 +08:00
|
|
|
ASSERT_EQ(bytesRead, cipherKey->size());
|
2021-06-26 15:07:27 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2021-07-10 12:20:40 +08:00
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
}; // class BackupContainerFileSystemImpl
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
Future<Reference<IBackupFile>> BackupContainerFileSystem::writeLogFile(Version beginVersion,
|
|
|
|
Version endVersion,
|
|
|
|
int blockSize) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return writeFile(BackupContainerFileSystemImpl::logVersionFolderString(beginVersion, false) +
|
2020-10-21 09:55:40 +08:00
|
|
|
format("log,%lld,%lld,%s,%d",
|
|
|
|
beginVersion,
|
|
|
|
endVersion,
|
|
|
|
deterministicRandom()->randomUniqueID().toString().c_str(),
|
|
|
|
blockSize));
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Reference<IBackupFile>> BackupContainerFileSystem::writeTaggedLogFile(Version beginVersion,
|
|
|
|
Version endVersion,
|
|
|
|
int blockSize,
|
|
|
|
uint16_t tagId,
|
|
|
|
int totalTags) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return writeFile(BackupContainerFileSystemImpl::logVersionFolderString(beginVersion, true) +
|
2020-10-21 09:55:40 +08:00
|
|
|
format("log,%lld,%lld,%s,%d-of-%d,%d",
|
|
|
|
beginVersion,
|
|
|
|
endVersion,
|
|
|
|
deterministicRandom()->randomUniqueID().toString().c_str(),
|
|
|
|
tagId,
|
|
|
|
totalTags,
|
|
|
|
blockSize));
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Reference<IBackupFile>> BackupContainerFileSystem::writeRangeFile(Version snapshotBeginVersion,
|
|
|
|
int snapshotFileCount,
|
|
|
|
Version fileVersion,
|
|
|
|
int blockSize) {
|
|
|
|
std::string fileName = format(
|
|
|
|
"range,%" PRId64 ",%s,%d", fileVersion, deterministicRandom()->randomUniqueID().toString().c_str(), blockSize);
|
|
|
|
|
|
|
|
// In order to test backward compatibility in simulation, sometimes write to the old path format
|
|
|
|
if (g_network->isSimulated() && deterministicRandom()->coinflip()) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return writeFile(BackupContainerFileSystemImpl::old_rangeVersionFolderString(fileVersion) + fileName);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
return writeFile(BackupContainerFileSystemImpl::snapshotFolderString(snapshotBeginVersion) +
|
2020-10-21 09:55:40 +08:00
|
|
|
format("/%d/", snapshotFileCount / (BUGGIFY ? 1 : 5000)) + fileName);
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>>>
|
|
|
|
BackupContainerFileSystem::readKeyspaceSnapshot(KeyspaceSnapshotFile snapshot) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::readKeyspaceSnapshot(Reference<BackupContainerFileSystem>::addRef(this),
|
|
|
|
snapshot);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Future<Void> BackupContainerFileSystem::writeKeyspaceSnapshotFile(const std::vector<std::string>& fileNames,
|
|
|
|
const std::vector<std::pair<Key, Key>>& beginEndKeys,
|
2023-02-28 13:40:46 +08:00
|
|
|
int64_t totalBytes,
|
|
|
|
IncludeKeyRangeMap includeKeyRangeMap) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::writeKeyspaceSnapshotFile(
|
2023-02-28 13:40:46 +08:00
|
|
|
Reference<BackupContainerFileSystem>::addRef(this), fileNames, beginEndKeys, totalBytes, includeKeyRangeMap);
|
2020-10-21 09:55:40 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
Future<std::vector<LogFile>> BackupContainerFileSystem::listLogFiles(Version beginVersion,
|
|
|
|
Version targetVersion,
|
|
|
|
bool partitioned) {
|
|
|
|
// The first relevant log file could have a begin version less than beginVersion based on the knobs which
|
|
|
|
// determine log file range size, so start at an earlier version adjusted by how many versions a file could
|
|
|
|
// contain.
|
|
|
|
//
|
|
|
|
// Get the cleaned (without slashes) first and last folders that could contain relevant results.
|
2020-10-22 14:56:37 +08:00
|
|
|
std::string firstPath =
|
|
|
|
BackupContainerFileSystemImpl::cleanFolderString(BackupContainerFileSystemImpl::logVersionFolderString(
|
|
|
|
std::max<Version>(0,
|
|
|
|
beginVersion - CLIENT_KNOBS->BACKUP_MAX_LOG_RANGES * CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE),
|
|
|
|
partitioned));
|
|
|
|
std::string lastPath = BackupContainerFileSystemImpl::cleanFolderString(
|
|
|
|
BackupContainerFileSystemImpl::logVersionFolderString(targetVersion, partitioned));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
std::function<bool(std::string const&)> pathFilter = [=](const std::string& folderPath) {
|
|
|
|
// Remove slashes in the given folder path so that the '/' positions in the version folder string do not
|
|
|
|
// matter
|
|
|
|
|
2020-10-22 14:56:37 +08:00
|
|
|
std::string cleaned = BackupContainerFileSystemImpl::cleanFolderString(folderPath);
|
2020-10-21 09:55:40 +08:00
|
|
|
return StringRef(firstPath).startsWith(cleaned) || StringRef(lastPath).startsWith(cleaned) ||
|
|
|
|
(cleaned > firstPath && cleaned < lastPath);
|
|
|
|
};
|
|
|
|
|
|
|
|
return map(listFiles((partitioned ? "plogs/" : "logs/"), pathFilter), [=](const FilesAndSizesT& files) {
|
|
|
|
std::vector<LogFile> results;
|
|
|
|
LogFile lf;
|
|
|
|
for (auto& f : files) {
|
2020-10-22 14:56:37 +08:00
|
|
|
if (BackupContainerFileSystemImpl::pathToLogFile(lf, f.first, f.second) && lf.endVersion > beginVersion &&
|
2020-10-21 09:55:40 +08:00
|
|
|
lf.beginVersion <= targetVersion)
|
|
|
|
results.push_back(lf);
|
|
|
|
}
|
|
|
|
return results;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<std::vector<RangeFile>> BackupContainerFileSystem::old_listRangeFiles(Version beginVersion, Version endVersion) {
|
|
|
|
// Get the cleaned (without slashes) first and last folders that could contain relevant results.
|
2020-10-22 14:56:37 +08:00
|
|
|
std::string firstPath = BackupContainerFileSystemImpl::cleanFolderString(
|
|
|
|
BackupContainerFileSystemImpl::old_rangeVersionFolderString(beginVersion));
|
|
|
|
std::string lastPath = BackupContainerFileSystemImpl::cleanFolderString(
|
|
|
|
BackupContainerFileSystemImpl::old_rangeVersionFolderString(endVersion));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
std::function<bool(std::string const&)> pathFilter = [=](const std::string& folderPath) {
|
|
|
|
// Remove slashes in the given folder path so that the '/' positions in the version folder string do not
|
|
|
|
// matter
|
2020-10-22 14:56:37 +08:00
|
|
|
std::string cleaned = BackupContainerFileSystemImpl::cleanFolderString(folderPath);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
return StringRef(firstPath).startsWith(cleaned) || StringRef(lastPath).startsWith(cleaned) ||
|
|
|
|
(cleaned > firstPath && cleaned < lastPath);
|
|
|
|
};
|
|
|
|
|
|
|
|
return map(listFiles("ranges/", pathFilter), [=](const FilesAndSizesT& files) {
|
|
|
|
std::vector<RangeFile> results;
|
|
|
|
RangeFile rf;
|
|
|
|
for (auto& f : files) {
|
2020-10-22 14:56:37 +08:00
|
|
|
if (BackupContainerFileSystemImpl::pathToRangeFile(rf, f.first, f.second) && rf.version >= beginVersion &&
|
|
|
|
rf.version <= endVersion)
|
2020-10-21 09:55:40 +08:00
|
|
|
results.push_back(rf);
|
|
|
|
}
|
|
|
|
return results;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<std::vector<RangeFile>> BackupContainerFileSystem::listRangeFiles(Version beginVersion, Version endVersion) {
|
|
|
|
// Until the old folder scheme is no longer supported, read files stored using old folder scheme
|
|
|
|
Future<std::vector<RangeFile>> oldFiles = old_listRangeFiles(beginVersion, endVersion);
|
|
|
|
|
|
|
|
// Define filter function (for listFiles() implementations that use it) to reject any folder
|
|
|
|
// starting after endVersion
|
|
|
|
std::function<bool(std::string const&)> pathFilter = [=](std::string const& path) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::extractSnapshotBeginVersion(path) <= endVersion;
|
2020-10-21 09:55:40 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
Future<std::vector<RangeFile>> newFiles = map(listFiles("kvranges/", pathFilter), [=](const FilesAndSizesT& files) {
|
|
|
|
std::vector<RangeFile> results;
|
|
|
|
RangeFile rf;
|
|
|
|
for (auto& f : files) {
|
2020-10-22 14:56:37 +08:00
|
|
|
if (BackupContainerFileSystemImpl::pathToRangeFile(rf, f.first, f.second) && rf.version >= beginVersion &&
|
|
|
|
rf.version <= endVersion)
|
2020-10-21 09:55:40 +08:00
|
|
|
results.push_back(rf);
|
|
|
|
}
|
|
|
|
return results;
|
|
|
|
});
|
|
|
|
|
|
|
|
return map(success(oldFiles) && success(newFiles), [=](Void _) {
|
|
|
|
std::vector<RangeFile> results = std::move(newFiles.get());
|
|
|
|
std::vector<RangeFile> oldResults = std::move(oldFiles.get());
|
|
|
|
results.insert(
|
|
|
|
results.end(), std::make_move_iterator(oldResults.begin()), std::make_move_iterator(oldResults.end()));
|
|
|
|
return results;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<std::vector<KeyspaceSnapshotFile>> BackupContainerFileSystem::listKeyspaceSnapshots(Version begin, Version end) {
|
|
|
|
return map(listFiles("snapshots/"), [=](const FilesAndSizesT& files) {
|
|
|
|
std::vector<KeyspaceSnapshotFile> results;
|
|
|
|
KeyspaceSnapshotFile sf;
|
|
|
|
for (auto& f : files) {
|
2020-10-22 14:56:37 +08:00
|
|
|
if (BackupContainerFileSystemImpl::pathToKeyspaceSnapshotFile(sf, f.first) && sf.beginVersion < end &&
|
|
|
|
sf.endVersion >= begin)
|
2020-10-21 09:55:40 +08:00
|
|
|
results.push_back(sf);
|
|
|
|
}
|
|
|
|
std::sort(results.begin(), results.end());
|
|
|
|
return results;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<BackupFileList> BackupContainerFileSystem::dumpFileList(Version begin, Version end) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::dumpFileList(Reference<BackupContainerFileSystem>::addRef(this), begin, end);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Future<BackupDescription> BackupContainerFileSystem::describeBackup(bool deepScan, Version logStartVersionOverride) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::describeBackup(
|
|
|
|
Reference<BackupContainerFileSystem>::addRef(this), deepScan, logStartVersionOverride);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Future<Void> BackupContainerFileSystem::expireData(Version expireEndVersion,
|
|
|
|
bool force,
|
|
|
|
ExpireProgress* progress,
|
|
|
|
Version restorableBeginVersion) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::expireData(
|
|
|
|
Reference<BackupContainerFileSystem>::addRef(this), expireEndVersion, force, progress, restorableBeginVersion);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2022-09-30 05:45:47 +08:00
|
|
|
ACTOR static Future<KeyRange> getSnapshotFileKeyRange_impl(Reference<BackupContainerFileSystem> bc,
|
|
|
|
RangeFile file,
|
2023-01-27 09:46:14 +08:00
|
|
|
Database cx) {
|
2020-10-21 09:55:40 +08:00
|
|
|
state int readFileRetries = 0;
|
|
|
|
state bool beginKeySet = false;
|
|
|
|
state Key beginKey;
|
|
|
|
state Key endKey;
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
state Reference<IAsyncFile> inFile = wait(bc->readFile(file.fileName));
|
|
|
|
beginKeySet = false;
|
|
|
|
state int64_t j = 0;
|
|
|
|
for (; j < file.fileSize; j += file.blockSize) {
|
|
|
|
int64_t len = std::min<int64_t>(file.blockSize, file.fileSize - j);
|
2022-09-30 05:45:47 +08:00
|
|
|
Standalone<VectorRef<KeyValueRef>> blockData =
|
|
|
|
wait(fileBackup::decodeRangeFileBlock(inFile, j, len, cx));
|
2020-10-21 09:55:40 +08:00
|
|
|
if (!beginKeySet) {
|
|
|
|
beginKey = blockData.front().key;
|
|
|
|
beginKeySet = true;
|
|
|
|
}
|
|
|
|
endKey = blockData.back().key;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_restore_bad_read || e.code() == error_code_restore_unsupported_file_version ||
|
|
|
|
e.code() == error_code_restore_corrupted_data_padding) { // no retriable error
|
|
|
|
TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRange").error(e);
|
|
|
|
throw;
|
|
|
|
} else if (e.code() == error_code_http_request_failed || e.code() == error_code_connection_failed ||
|
|
|
|
e.code() == error_code_timed_out || e.code() == error_code_lookup_failed) {
|
|
|
|
// blob http request failure, retry
|
|
|
|
TraceEvent(SevWarnAlways, "BackupContainerGetSnapshotFileKeyRangeConnectionFailure")
|
2022-02-25 04:25:52 +08:00
|
|
|
.error(e)
|
|
|
|
.detail("Retries", ++readFileRetries);
|
2020-10-21 09:55:40 +08:00
|
|
|
wait(delayJittered(0.1));
|
|
|
|
} else {
|
|
|
|
TraceEvent(SevError, "BackupContainerGetSnapshotFileKeyRangeUnexpectedError").error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return KeyRange(KeyRangeRef(beginKey, endKey));
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR static Future<Void> writeVersionProperty(Reference<BackupContainerFileSystem> bc, std::string path, Version v) {
|
|
|
|
try {
|
|
|
|
state Reference<IBackupFile> f = wait(bc->writeFile(path));
|
|
|
|
std::string s = format("%lld", v);
|
|
|
|
wait(f->append(s.data(), s.size()));
|
|
|
|
wait(f->finish());
|
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevWarn, "BackupContainerWritePropertyFailed")
|
|
|
|
.error(e)
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("Path", path);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR static Future<Optional<Version>> readVersionProperty(Reference<BackupContainerFileSystem> bc, std::string path) {
|
|
|
|
try {
|
|
|
|
state Reference<IAsyncFile> f = wait(bc->readFile(path));
|
|
|
|
state int64_t size = wait(f->size());
|
|
|
|
state std::string s;
|
|
|
|
s.resize(size);
|
|
|
|
int rs = wait(f->read((uint8_t*)s.data(), size, 0));
|
|
|
|
Version v;
|
|
|
|
int len;
|
|
|
|
if (rs == size && sscanf(s.c_str(), "%" SCNd64 "%n", &v, &len) == 1 && len == size)
|
|
|
|
return v;
|
|
|
|
|
|
|
|
TraceEvent(SevWarn, "BackupContainerInvalidProperty").detail("URL", bc->getURL()).detail("Path", path);
|
|
|
|
|
|
|
|
throw backup_invalid_info();
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_file_not_found)
|
|
|
|
return Optional<Version>();
|
|
|
|
|
|
|
|
TraceEvent(SevWarn, "BackupContainerReadPropertyFailed")
|
|
|
|
.error(e)
|
|
|
|
.detail("URL", bc->getURL())
|
|
|
|
.detail("Path", path);
|
|
|
|
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-27 09:46:14 +08:00
|
|
|
Future<KeyRange> BackupContainerFileSystem::getSnapshotFileKeyRange(const RangeFile& file, Database cx) {
|
2020-10-21 09:55:40 +08:00
|
|
|
ASSERT(g_network->isSimulated());
|
2022-09-30 05:45:47 +08:00
|
|
|
return getSnapshotFileKeyRange_impl(Reference<BackupContainerFileSystem>::addRef(this), file, cx);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Future<Optional<RestorableFileSet>> BackupContainerFileSystem::getRestoreSet(Version targetVersion,
|
|
|
|
VectorRef<KeyRangeRef> keyRangesFilter,
|
|
|
|
bool logsOnly,
|
|
|
|
Version beginVersion) {
|
2020-10-22 14:56:37 +08:00
|
|
|
return BackupContainerFileSystemImpl::getRestoreSet(
|
2023-01-27 09:46:14 +08:00
|
|
|
Reference<BackupContainerFileSystem>::addRef(this), targetVersion, keyRangesFilter, logsOnly, beginVersion);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Future<Optional<Version>> BackupContainerFileSystem::VersionProperty::get() {
|
|
|
|
return readVersionProperty(bc, path);
|
|
|
|
}
|
|
|
|
Future<Void> BackupContainerFileSystem::VersionProperty::set(Version v) {
|
|
|
|
return writeVersionProperty(bc, path, v);
|
|
|
|
}
|
|
|
|
Future<Void> BackupContainerFileSystem::VersionProperty::clear() {
|
|
|
|
return bc->deleteFile(path);
|
|
|
|
}
|
|
|
|
|
|
|
|
BackupContainerFileSystem::VersionProperty BackupContainerFileSystem::logBeginVersion() {
|
|
|
|
return { Reference<BackupContainerFileSystem>::addRef(this), "log_begin_version" };
|
|
|
|
}
|
|
|
|
BackupContainerFileSystem::VersionProperty BackupContainerFileSystem::logEndVersion() {
|
|
|
|
return { Reference<BackupContainerFileSystem>::addRef(this), "log_end_version" };
|
|
|
|
}
|
|
|
|
BackupContainerFileSystem::VersionProperty BackupContainerFileSystem::expiredEndVersion() {
|
|
|
|
return { Reference<BackupContainerFileSystem>::addRef(this), "expired_end_version" };
|
|
|
|
}
|
|
|
|
BackupContainerFileSystem::VersionProperty BackupContainerFileSystem::unreliableEndVersion() {
|
|
|
|
return { Reference<BackupContainerFileSystem>::addRef(this), "unreliable_end_version" };
|
|
|
|
}
|
|
|
|
BackupContainerFileSystem::VersionProperty BackupContainerFileSystem::logType() {
|
|
|
|
return { Reference<BackupContainerFileSystem>::addRef(this), "mutation_log_type" };
|
|
|
|
}
|
2021-06-26 15:07:27 +08:00
|
|
|
bool BackupContainerFileSystem::usesEncryption() const {
|
|
|
|
return encryptionSetupFuture.isValid();
|
|
|
|
}
|
|
|
|
Future<Void> BackupContainerFileSystem::encryptionSetupComplete() const {
|
|
|
|
return encryptionSetupFuture;
|
|
|
|
}
|
2021-07-21 01:52:57 +08:00
|
|
|
|
2022-11-09 07:30:01 +08:00
|
|
|
Future<Void> BackupContainerFileSystem::writeEntireFileFallback(const std::string& fileName,
|
|
|
|
const std::string& fileContents) {
|
|
|
|
return BackupContainerFileSystemImpl::writeEntireFileFallback(
|
|
|
|
Reference<BackupContainerFileSystem>::addRef(this), fileName, fileContents);
|
|
|
|
}
|
|
|
|
|
2021-06-26 15:07:27 +08:00
|
|
|
void BackupContainerFileSystem::setEncryptionKey(Optional<std::string> const& encryptionKeyFileName) {
|
|
|
|
if (encryptionKeyFileName.present()) {
|
|
|
|
encryptionSetupFuture = BackupContainerFileSystemImpl::readEncryptionKey(encryptionKeyFileName.get());
|
|
|
|
}
|
|
|
|
}
|
2021-08-01 02:35:02 +08:00
|
|
|
Future<Void> BackupContainerFileSystem::createTestEncryptionKeyFile(std::string const& filename) {
|
2021-06-27 02:14:23 +08:00
|
|
|
return BackupContainerFileSystemImpl::createTestEncryptionKeyFile(filename);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2021-10-19 05:37:19 +08:00
|
|
|
// Get a BackupContainerFileSystem based on a container URL string
|
|
|
|
// TODO: refactor to not duplicate IBackupContainer::openContainer. It's the exact same
|
2021-08-25 02:47:47 +08:00
|
|
|
// code but returning a different template type because you can't cast between them
|
|
|
|
Reference<BackupContainerFileSystem> BackupContainerFileSystem::openContainerFS(
|
|
|
|
const std::string& url,
|
2022-03-29 08:10:49 +08:00
|
|
|
const Optional<std::string>& proxy,
|
2023-04-20 03:18:08 +08:00
|
|
|
const Optional<std::string>& encryptionKeyFileName,
|
|
|
|
bool isBackup) {
|
2021-08-25 02:47:47 +08:00
|
|
|
static std::map<std::string, Reference<BackupContainerFileSystem>> m_cache;
|
|
|
|
|
|
|
|
Reference<BackupContainerFileSystem>& r = m_cache[url];
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
try {
|
|
|
|
StringRef u(url);
|
|
|
|
if (u.startsWith("file://"_sr)) {
|
|
|
|
r = makeReference<BackupContainerLocalDirectory>(url, encryptionKeyFileName);
|
|
|
|
} else if (u.startsWith("blobstore://"_sr)) {
|
|
|
|
std::string resource;
|
|
|
|
|
|
|
|
// The URL parameters contain blobstore endpoint tunables as well as possible backup-specific options.
|
|
|
|
S3BlobStoreEndpoint::ParametersT backupParams;
|
|
|
|
Reference<S3BlobStoreEndpoint> bstore =
|
2022-03-29 08:10:49 +08:00
|
|
|
S3BlobStoreEndpoint::fromString(url, proxy, &resource, &lastOpenError, &backupParams);
|
2021-08-25 02:47:47 +08:00
|
|
|
|
|
|
|
if (resource.empty())
|
|
|
|
throw backup_invalid_url();
|
|
|
|
for (auto c : resource)
|
|
|
|
if (!isalnum(c) && c != '_' && c != '-' && c != '.' && c != '/')
|
|
|
|
throw backup_invalid_url();
|
2023-04-20 03:18:08 +08:00
|
|
|
r = makeReference<BackupContainerS3BlobStore>(
|
|
|
|
bstore, resource, backupParams, encryptionKeyFileName, isBackup);
|
2021-08-25 02:47:47 +08:00
|
|
|
}
|
|
|
|
#ifdef BUILD_AZURE_BACKUP
|
|
|
|
else if (u.startsWith("azure://"_sr)) {
|
|
|
|
u.eat("azure://"_sr);
|
2022-08-09 16:37:24 +08:00
|
|
|
auto address = u.eat("/"_sr);
|
|
|
|
if (address.endsWith(std::string(azure::storage_lite::constants::default_endpoint_suffix))) {
|
2022-08-13 15:10:20 +08:00
|
|
|
CODE_PROBE(true, "Azure backup url with standard azure storage account endpoint");
|
2022-08-09 16:37:24 +08:00
|
|
|
// <account>.<service>.core.windows.net/<resource_path>
|
|
|
|
auto endPoint = address.toString();
|
|
|
|
auto accountName = address.eat("."_sr).toString();
|
|
|
|
auto containerName = u.eat("/"_sr).toString();
|
|
|
|
r = makeReference<BackupContainerAzureBlobStore>(
|
|
|
|
endPoint, accountName, containerName, encryptionKeyFileName);
|
|
|
|
} else {
|
|
|
|
// resolve the network address if necessary
|
|
|
|
std::string endpoint(address.toString());
|
|
|
|
Optional<NetworkAddress> parsedAddress = NetworkAddress::parseOptional(endpoint);
|
|
|
|
if (!parsedAddress.present()) {
|
|
|
|
try {
|
|
|
|
auto hostname = Hostname::parse(endpoint);
|
|
|
|
auto resolvedAddress = hostname.resolveBlocking();
|
|
|
|
if (resolvedAddress.present()) {
|
2022-08-13 15:10:20 +08:00
|
|
|
CODE_PROBE(true, "Azure backup url with hostname in the endpoint");
|
2022-08-09 16:37:24 +08:00
|
|
|
parsedAddress = resolvedAddress.get();
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevError, "InvalidAzureBackupUrl").error(e).detail("Endpoint", endpoint);
|
|
|
|
throw backup_invalid_url();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!parsedAddress.present()) {
|
|
|
|
TraceEvent(SevError, "InvalidAzureBackupUrl").detail("Endpoint", endpoint);
|
|
|
|
throw backup_invalid_url();
|
|
|
|
}
|
|
|
|
auto accountName = u.eat("/"_sr).toString();
|
|
|
|
// Avoid including ":tls" and "(fromHostname)"
|
|
|
|
// note: the endpoint needs to contain the account name
|
|
|
|
// so either "<account_name>.blob.core.windows.net" or "<ip>:<port>/<account_name>"
|
|
|
|
endpoint =
|
|
|
|
fmt::format("{}/{}", formatIpPort(parsedAddress.get().ip, parsedAddress.get().port), accountName);
|
|
|
|
auto containerName = u.eat("/"_sr).toString();
|
|
|
|
r = makeReference<BackupContainerAzureBlobStore>(
|
|
|
|
endpoint, accountName, containerName, encryptionKeyFileName);
|
|
|
|
}
|
2021-08-25 02:47:47 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
else {
|
|
|
|
lastOpenError = "invalid URL prefix";
|
|
|
|
throw backup_invalid_url();
|
|
|
|
}
|
|
|
|
|
|
|
|
r->encryptionKeyFileName = encryptionKeyFileName;
|
|
|
|
r->URL = url;
|
|
|
|
return r;
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_actor_cancelled)
|
|
|
|
throw;
|
|
|
|
|
|
|
|
TraceEvent m(SevWarn, "BackupContainer");
|
2022-02-25 04:25:52 +08:00
|
|
|
m.error(e);
|
2021-08-25 02:47:47 +08:00
|
|
|
m.detail("Description", "Invalid container specification. See help.");
|
|
|
|
m.detail("URL", url);
|
|
|
|
if (e.code() == error_code_backup_invalid_url)
|
|
|
|
m.detail("LastOpenError", lastOpenError);
|
|
|
|
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-21 09:55:40 +08:00
|
|
|
namespace backup_test {
|
|
|
|
|
|
|
|
int chooseFileSize(std::vector<int>& sizes) {
|
|
|
|
if (!sizes.empty()) {
|
2021-03-16 07:41:22 +08:00
|
|
|
int size = sizes.back();
|
2020-10-21 09:55:40 +08:00
|
|
|
sizes.pop_back();
|
2021-03-16 07:41:22 +08:00
|
|
|
return size;
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
2021-03-16 07:41:22 +08:00
|
|
|
return deterministicRandom()->randomInt(0, 2e6);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
|
2021-08-01 02:35:02 +08:00
|
|
|
ACTOR Future<Void> writeAndVerifyFile(Reference<IBackupContainer> c,
|
|
|
|
Reference<IBackupFile> f,
|
|
|
|
int size,
|
|
|
|
FlowLock* lock) {
|
2021-03-16 07:41:22 +08:00
|
|
|
state Standalone<VectorRef<uint8_t>> content;
|
|
|
|
|
|
|
|
wait(lock->take(TaskPriority::DefaultYield, size));
|
2021-08-01 02:35:02 +08:00
|
|
|
state FlowLock::Releaser releaser(*lock, size);
|
2021-03-16 07:41:22 +08:00
|
|
|
|
2021-08-01 02:35:02 +08:00
|
|
|
printf("writeAndVerify size=%d file=%s\n", size, f->getFileName().c_str());
|
2021-03-16 07:41:22 +08:00
|
|
|
content.resize(content.arena(), size);
|
|
|
|
for (int i = 0; i < content.size(); ++i) {
|
|
|
|
content[i] = (uint8_t)deterministicRandom()->randomInt(0, 256);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
2021-03-16 07:41:22 +08:00
|
|
|
state VectorRef<uint8_t> sendBuf = content;
|
|
|
|
while (sendBuf.size() > 0) {
|
|
|
|
state int n = std::min(sendBuf.size(), deterministicRandom()->randomInt(1, 16384));
|
|
|
|
wait(f->append(sendBuf.begin(), n));
|
|
|
|
sendBuf.pop_front(n);
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
wait(f->finish());
|
2021-03-16 07:41:22 +08:00
|
|
|
|
2020-10-21 09:55:40 +08:00
|
|
|
state Reference<IAsyncFile> inputFile = wait(c->readFile(f->getFileName()));
|
|
|
|
int64_t fileSize = wait(inputFile->size());
|
2021-06-26 13:33:26 +08:00
|
|
|
ASSERT_EQ(size, fileSize);
|
2020-10-21 09:55:40 +08:00
|
|
|
if (size > 0) {
|
2021-03-16 07:41:22 +08:00
|
|
|
state Standalone<VectorRef<uint8_t>> buf;
|
|
|
|
buf.resize(buf.arena(), fileSize);
|
|
|
|
int b = wait(inputFile->read(buf.begin(), buf.size(), 0));
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_EQ(b, buf.size());
|
2020-10-21 09:55:40 +08:00
|
|
|
ASSERT(buf == content);
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Randomly advance version by up to 1 second of versions
|
|
|
|
Version nextVersion(Version v) {
|
|
|
|
int64_t increment = deterministicRandom()->randomInt64(1, CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
|
|
|
|
return v + increment;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write a snapshot file with only begin & end key
|
|
|
|
ACTOR static Future<Void> testWriteSnapshotFile(Reference<IBackupFile> file, Key begin, Key end, uint32_t blockSize) {
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_GT(blockSize, 3 * sizeof(uint32_t) + begin.size() + end.size());
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
uint32_t fileVersion = BACKUP_AGENT_SNAPSHOT_FILE_VERSION;
|
|
|
|
// write Header
|
|
|
|
wait(file->append((uint8_t*)&fileVersion, sizeof(fileVersion)));
|
|
|
|
|
|
|
|
// write begin key length and key
|
|
|
|
wait(file->appendStringRefWithLen(begin));
|
|
|
|
|
|
|
|
// write end key length and key
|
|
|
|
wait(file->appendStringRefWithLen(end));
|
|
|
|
|
|
|
|
int bytesLeft = blockSize - file->size();
|
|
|
|
if (bytesLeft > 0) {
|
|
|
|
Value paddings = fileBackup::makePadding(bytesLeft);
|
|
|
|
wait(file->append(paddings.begin(), bytesLeft));
|
|
|
|
}
|
|
|
|
wait(file->finish());
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2022-03-29 08:10:49 +08:00
|
|
|
ACTOR Future<Void> testBackupContainer(std::string url,
|
|
|
|
Optional<std::string> proxy,
|
2023-01-27 09:46:14 +08:00
|
|
|
Optional<std::string> encryptionKeyFileName) {
|
2021-03-16 07:41:22 +08:00
|
|
|
state FlowLock lock(100e6);
|
|
|
|
|
2021-06-26 13:33:26 +08:00
|
|
|
if (encryptionKeyFileName.present()) {
|
2021-06-27 02:15:12 +08:00
|
|
|
wait(BackupContainerFileSystem::createTestEncryptionKeyFile(encryptionKeyFileName.get()));
|
2021-06-26 13:33:26 +08:00
|
|
|
}
|
|
|
|
|
2020-10-21 09:55:40 +08:00
|
|
|
printf("BackupContainerTest URL %s\n", url.c_str());
|
|
|
|
|
2022-03-29 08:10:49 +08:00
|
|
|
state Reference<IBackupContainer> c = IBackupContainer::openContainer(url, proxy, encryptionKeyFileName);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
// Make sure container doesn't exist, then create it.
|
|
|
|
try {
|
|
|
|
wait(c->deleteContainer());
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_backup_invalid_url && e.code() != error_code_backup_does_not_exist)
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
|
|
|
wait(c->create());
|
|
|
|
|
|
|
|
state std::vector<Future<Void>> writes;
|
|
|
|
state std::map<Version, std::vector<std::string>> snapshots;
|
|
|
|
state std::map<Version, int64_t> snapshotSizes;
|
|
|
|
state std::map<Version, std::vector<std::pair<Key, Key>>> snapshotBeginEndKeys;
|
|
|
|
state int nRangeFiles = 0;
|
|
|
|
state std::map<Version, std::string> logs;
|
|
|
|
state Version v = deterministicRandom()->randomInt64(0, std::numeric_limits<Version>::max() / 2);
|
|
|
|
|
|
|
|
// List of sizes to use to test edge cases on underlying file implementations
|
2021-03-16 07:41:22 +08:00
|
|
|
state std::vector<int> fileSizes = { 0 };
|
2022-09-20 02:35:58 +08:00
|
|
|
if (StringRef(url).startsWith("blob"_sr)) {
|
2021-08-01 02:35:02 +08:00
|
|
|
fileSizes.push_back(CLIENT_KNOBS->BLOBSTORE_MULTIPART_MIN_PART_SIZE);
|
|
|
|
fileSizes.push_back(CLIENT_KNOBS->BLOBSTORE_MULTIPART_MIN_PART_SIZE + 10);
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
loop {
|
|
|
|
state Version logStart = v;
|
|
|
|
state int kvfiles = deterministicRandom()->randomInt(0, 3);
|
2022-09-20 02:35:58 +08:00
|
|
|
state Key begin = ""_sr;
|
|
|
|
state Key end = ""_sr;
|
2020-10-21 09:55:40 +08:00
|
|
|
state int blockSize = 3 * sizeof(uint32_t) + begin.size() + end.size() + 8;
|
|
|
|
|
|
|
|
while (kvfiles > 0) {
|
|
|
|
if (snapshots.empty()) {
|
|
|
|
snapshots[v] = {};
|
|
|
|
snapshotBeginEndKeys[v] = {};
|
|
|
|
snapshotSizes[v] = 0;
|
|
|
|
if (deterministicRandom()->coinflip()) {
|
|
|
|
v = nextVersion(v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Reference<IBackupFile> range = wait(c->writeRangeFile(snapshots.rbegin()->first, 0, v, blockSize));
|
|
|
|
++nRangeFiles;
|
|
|
|
v = nextVersion(v);
|
|
|
|
snapshots.rbegin()->second.push_back(range->getFileName());
|
|
|
|
snapshotBeginEndKeys.rbegin()->second.emplace_back(begin, end);
|
|
|
|
|
|
|
|
int size = chooseFileSize(fileSizes);
|
|
|
|
snapshotSizes.rbegin()->second += size;
|
|
|
|
// Write in actual range file format, instead of random data.
|
2021-03-16 07:41:22 +08:00
|
|
|
// writes.push_back(writeAndVerifyFile(c, range, size, &lock));
|
2020-10-21 09:55:40 +08:00
|
|
|
wait(testWriteSnapshotFile(range, begin, end, blockSize));
|
|
|
|
|
|
|
|
if (deterministicRandom()->random01() < .2) {
|
2023-02-28 13:40:46 +08:00
|
|
|
writes.push_back(c->writeKeyspaceSnapshotFile(snapshots.rbegin()->second,
|
|
|
|
snapshotBeginEndKeys.rbegin()->second,
|
|
|
|
snapshotSizes.rbegin()->second,
|
|
|
|
IncludeKeyRangeMap(BUGGIFY)));
|
2020-10-21 09:55:40 +08:00
|
|
|
snapshots[v] = {};
|
|
|
|
snapshotBeginEndKeys[v] = {};
|
|
|
|
snapshotSizes[v] = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
--kvfiles;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (logStart == v || deterministicRandom()->coinflip()) {
|
|
|
|
v = nextVersion(v);
|
|
|
|
}
|
|
|
|
state Reference<IBackupFile> log = wait(c->writeLogFile(logStart, v, 10));
|
|
|
|
logs[logStart] = log->getFileName();
|
|
|
|
int size = chooseFileSize(fileSizes);
|
2021-03-16 07:41:22 +08:00
|
|
|
writes.push_back(writeAndVerifyFile(c, log, size, &lock));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
// Randomly stop after a snapshot has finished and all manually seeded file sizes have been used.
|
|
|
|
if (fileSizes.empty() && !snapshots.empty() && snapshots.rbegin()->second.empty() &&
|
|
|
|
deterministicRandom()->random01() < .2) {
|
|
|
|
snapshots.erase(snapshots.rbegin()->first);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
wait(waitForAll(writes));
|
|
|
|
|
|
|
|
state BackupFileList listing = wait(c->dumpFileList());
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_EQ(listing.ranges.size(), nRangeFiles);
|
|
|
|
ASSERT_EQ(listing.logs.size(), logs.size());
|
|
|
|
ASSERT_EQ(listing.snapshots.size(), snapshots.size());
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
state BackupDescription desc = wait(c->describeBackup());
|
|
|
|
printf("\n%s\n", desc.toString().c_str());
|
|
|
|
|
|
|
|
// Do a series of expirations and verify resulting state
|
|
|
|
state int i = 0;
|
|
|
|
for (; i < listing.snapshots.size(); ++i) {
|
|
|
|
{
|
|
|
|
// Ensure we can still restore to the latest version
|
2023-01-27 09:46:14 +08:00
|
|
|
Optional<RestorableFileSet> rest = wait(c->getRestoreSet(desc.maxRestorableVersion.get()));
|
2020-10-21 09:55:40 +08:00
|
|
|
ASSERT(rest.present());
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
// Ensure we can restore to the end version of snapshot i
|
2023-01-27 09:46:14 +08:00
|
|
|
Optional<RestorableFileSet> rest = wait(c->getRestoreSet(listing.snapshots[i].endVersion));
|
2020-10-21 09:55:40 +08:00
|
|
|
ASSERT(rest.present());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test expiring to the end of this snapshot
|
|
|
|
state Version expireVersion = listing.snapshots[i].endVersion;
|
|
|
|
|
|
|
|
// Expire everything up to but not including the snapshot end version
|
2021-12-01 12:14:35 +08:00
|
|
|
fmt::print("EXPIRE TO {}\n", expireVersion);
|
2020-10-21 09:55:40 +08:00
|
|
|
state Future<Void> f = c->expireData(expireVersion);
|
|
|
|
wait(ready(f));
|
|
|
|
|
|
|
|
// If there is an error, it must be backup_cannot_expire and we have to be on the last snapshot
|
|
|
|
if (f.isError()) {
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_EQ(f.getError().code(), error_code_backup_cannot_expire);
|
|
|
|
ASSERT_EQ(i, listing.snapshots.size() - 1);
|
2020-10-21 09:55:40 +08:00
|
|
|
wait(c->expireData(expireVersion, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
BackupDescription d = wait(c->describeBackup());
|
|
|
|
printf("\n%s\n", d.toString().c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("DELETING\n");
|
|
|
|
wait(c->deleteContainer());
|
|
|
|
|
|
|
|
state Future<BackupDescription> d = c->describeBackup();
|
|
|
|
wait(ready(d));
|
|
|
|
ASSERT(d.isError() && d.getError().code() == error_code_backup_does_not_exist);
|
|
|
|
|
|
|
|
BackupFileList empty = wait(c->dumpFileList());
|
2021-06-27 02:15:12 +08:00
|
|
|
ASSERT_EQ(empty.ranges.size(), 0);
|
|
|
|
ASSERT_EQ(empty.logs.size(), 0);
|
|
|
|
ASSERT_EQ(empty.snapshots.size(), 0);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
printf("BackupContainerTest URL=%s PASSED.\n", url.c_str());
|
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-06-26 13:33:26 +08:00
|
|
|
TEST_CASE("/backup/containers/localdir/unencrypted") {
|
2023-01-27 09:46:14 +08:00
|
|
|
wait(testBackupContainer(format("file://%s/fdb_backups/%llx", params.getDataDir().c_str(), timer_int()), {}, {}));
|
2020-10-21 09:55:40 +08:00
|
|
|
return Void();
|
2021-06-26 13:33:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE("/backup/containers/localdir/encrypted") {
|
|
|
|
wait(testBackupContainer(format("file://%s/fdb_backups/%llx", params.getDataDir().c_str(), timer_int()),
|
2022-03-29 08:10:49 +08:00
|
|
|
{},
|
2023-01-27 09:46:14 +08:00
|
|
|
format("%s/test_encryption_key", params.getDataDir().c_str())));
|
2021-06-26 13:33:26 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
TEST_CASE("/backup/containers/url") {
|
|
|
|
if (!g_network->isSimulated()) {
|
|
|
|
const char* url = getenv("FDB_TEST_BACKUP_URL");
|
|
|
|
ASSERT(url != nullptr);
|
2023-01-27 09:46:14 +08:00
|
|
|
wait(testBackupContainer(url, {}, {}));
|
2020-10-21 09:55:40 +08:00
|
|
|
}
|
|
|
|
return Void();
|
2021-06-26 13:33:26 +08:00
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
TEST_CASE("/backup/containers_list") {
|
|
|
|
if (!g_network->isSimulated()) {
|
|
|
|
state const char* url = getenv("FDB_TEST_BACKUP_URL");
|
|
|
|
ASSERT(url != nullptr);
|
|
|
|
printf("Listing %s\n", url);
|
2022-03-29 08:10:49 +08:00
|
|
|
std::vector<std::string> urls = wait(IBackupContainer::listContainers(url, {}));
|
2020-10-21 09:55:40 +08:00
|
|
|
for (auto& u : urls) {
|
|
|
|
printf("%s\n", u.c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Void();
|
2021-06-26 13:33:26 +08:00
|
|
|
}
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
TEST_CASE("/backup/time") {
|
|
|
|
// test formatTime()
|
|
|
|
for (int i = 0; i < 1000; ++i) {
|
|
|
|
int64_t ts = deterministicRandom()->randomInt64(0, std::numeric_limits<int32_t>::max());
|
|
|
|
ASSERT(BackupAgentBase::parseTime(BackupAgentBase::formatTime(ts)) == ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT(BackupAgentBase::parseTime("2019/03/18.17:51:11-0600") ==
|
|
|
|
BackupAgentBase::parseTime("2019/03/18.16:51:11-0700"));
|
|
|
|
ASSERT(BackupAgentBase::parseTime("2019/03/31.22:45:07-0700") ==
|
|
|
|
BackupAgentBase::parseTime("2019/04/01.03:45:07-0200"));
|
|
|
|
ASSERT(BackupAgentBase::parseTime("2019/03/31.22:45:07+0000") ==
|
|
|
|
BackupAgentBase::parseTime("2019/04/01.03:45:07+0500"));
|
|
|
|
ASSERT(BackupAgentBase::parseTime("2019/03/31.22:45:07+0030") ==
|
|
|
|
BackupAgentBase::parseTime("2019/04/01.03:45:07+0530"));
|
|
|
|
ASSERT(BackupAgentBase::parseTime("2019/03/31.22:45:07+0030") ==
|
|
|
|
BackupAgentBase::parseTime("2019/04/01.04:00:07+0545"));
|
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_CASE("/backup/continuous") {
|
|
|
|
std::vector<LogFile> files;
|
|
|
|
|
|
|
|
// [0, 100) 2 tags
|
|
|
|
files.push_back({ 0, 100, 10, "file1", 100, 0, 2 }); // Tag 0: 0-100
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 99));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 0) == 0);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
files.push_back({ 0, 100, 10, "file2", 200, 1, 2 }); // Tag 1: 0-100
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 99));
|
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 100));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 0) == 99);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
// [100, 300) 3 tags
|
|
|
|
files.push_back({ 100, 200, 10, "file3", 200, 0, 3 }); // Tag 0: 100-200
|
|
|
|
files.push_back({ 100, 250, 10, "file4", 200, 1, 3 }); // Tag 1: 100-250
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 99));
|
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 100));
|
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 50, 150));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 0) == 99);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
files.push_back({ 100, 300, 10, "file5", 200, 2, 3 }); // Tag 2: 100-300
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 50, 150));
|
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 50, 200));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 10, 199));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 0) == 199);
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 100) == 199);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
files.push_back({ 250, 300, 10, "file6", 200, 0, 3 }); // Tag 0: 250-300, missing 200-250
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 50, 240));
|
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 100, 280));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 99) == 199);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
files.push_back({ 250, 300, 10, "file7", 200, 1, 3 }); // Tag 1: 250-300
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(!BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 100, 280));
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
files.push_back({ 200, 250, 10, "file8", 200, 0, 3 }); // Tag 0: 200-250
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 299));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 100, 280));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 150) == 299);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
// [300, 400) 1 tag
|
|
|
|
// files.push_back({200, 250, 10, "file9", 200, 0, 3}); // Tag 0: 200-250, duplicate file
|
|
|
|
files.push_back({ 300, 400, 10, "file10", 200, 0, 1 }); // Tag 1: 300-400
|
|
|
|
std::sort(files.begin(), files.end());
|
2020-10-22 14:56:37 +08:00
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 0, 399));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 100, 399));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 150, 399));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::isPartitionedLogsContinuous(files, 250, 399));
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 0) == 399);
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 99) == 399);
|
|
|
|
ASSERT(BackupContainerFileSystemImpl::getPartitionedLogsContinuousEndVersion(files, 250) == 399);
|
2020-10-21 09:55:40 +08:00
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace backup_test
|