2019-10-17 13:44:47 +08:00
|
|
|
/*
|
|
|
|
* FileConverter.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "fdbbackup/FileConverter.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <iostream>
|
|
|
|
#include <cinttypes>
|
|
|
|
#include <cstdio>
|
|
|
|
#include <vector>
|
|
|
|
|
2019-10-26 07:01:09 +08:00
|
|
|
#include "fdbclient/BackupAgent.actor.h"
|
2019-10-17 13:44:47 +08:00
|
|
|
#include "fdbclient/BackupContainer.h"
|
2019-11-16 05:44:43 +08:00
|
|
|
#include "fdbclient/MutationList.h"
|
2019-10-17 13:44:47 +08:00
|
|
|
#include "flow/flow.h"
|
2019-10-26 07:01:09 +08:00
|
|
|
#include "flow/serialize.h"
|
2020-09-11 08:02:24 +08:00
|
|
|
#include "fdbclient/BuildFlags.h"
|
2020-02-04 02:42:05 +08:00
|
|
|
#include "flow/actorcompiler.h" // has to be last include
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-10-27 00:27:29 +08:00
|
|
|
namespace file_converter {
|
|
|
|
|
2019-10-17 13:44:47 +08:00
|
|
|
void printConvertUsage() {
|
2019-11-16 12:30:43 +08:00
|
|
|
std::cout << "\n"
|
|
|
|
<< " -r, --container Container URL.\n"
|
|
|
|
<< " -b, --begin BEGIN\n"
|
|
|
|
<< " Begin version.\n"
|
|
|
|
<< " -e, --end END End version.\n"
|
|
|
|
<< " --log Enables trace file logging for the CLI session.\n"
|
|
|
|
<< " --logdir PATH Specifes the output directory for trace files. If\n"
|
|
|
|
<< " unspecified, defaults to the current directory. Has\n"
|
|
|
|
<< " no effect unless --log is specified.\n"
|
|
|
|
<< " --loggroup LOG_GROUP\n"
|
|
|
|
<< " Sets the LogGroup field with the specified value for all\n"
|
|
|
|
<< " events in the trace output (defaults to `default').\n"
|
|
|
|
<< " --trace_format FORMAT\n"
|
|
|
|
<< " Select the format of the trace files. xml (the default) and json are supported.\n"
|
|
|
|
<< " Has no effect unless --log is specified.\n"
|
2020-09-11 08:02:24 +08:00
|
|
|
<< " --build_flags Print build information and exit.\n"
|
2019-11-16 12:30:43 +08:00
|
|
|
<< " -h, --help Display this help and exit.\n"
|
|
|
|
<< "\n";
|
2019-10-17 13:44:47 +08:00
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-09-11 08:02:24 +08:00
|
|
|
void printBuildInformation() {
|
|
|
|
printf("%s", jsonBuildInformation().c_str());
|
|
|
|
}
|
|
|
|
|
2019-11-21 02:56:40 +08:00
|
|
|
void printLogFiles(std::string msg, const std::vector<LogFile>& files) {
|
|
|
|
std::cout << msg << " " << files.size() << " log files\n";
|
|
|
|
for (const auto& file : files) {
|
|
|
|
std::cout << file.toString() << "\n";
|
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
|
|
|
|
2019-10-17 13:44:47 +08:00
|
|
|
std::vector<LogFile> getRelevantLogFiles(const std::vector<LogFile>& files, Version begin, Version end) {
|
2019-11-21 02:56:40 +08:00
|
|
|
std::vector<LogFile> filtered;
|
2019-10-17 13:44:47 +08:00
|
|
|
for (const auto& file : files) {
|
2020-03-11 07:14:35 +08:00
|
|
|
if (file.beginVersion <= end && file.endVersion >= begin && file.tagId >= 0 && file.fileSize > 0) {
|
2019-11-21 02:56:40 +08:00
|
|
|
filtered.push_back(file);
|
2019-10-17 13:44:47 +08:00
|
|
|
}
|
|
|
|
}
|
2019-11-21 02:56:40 +08:00
|
|
|
std::sort(filtered.begin(), filtered.end());
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-11-21 02:56:40 +08:00
|
|
|
// Remove duplicates. This is because backup workers may store the log for
|
|
|
|
// old epochs successfully, but do not update the progress before another
|
2020-03-11 07:14:35 +08:00
|
|
|
// recovery happened. As a result, next epoch will retry and creates
|
2019-11-21 02:56:40 +08:00
|
|
|
// duplicated log files.
|
|
|
|
std::vector<LogFile> sorted;
|
|
|
|
int i = 0;
|
|
|
|
for (int j = 1; j < filtered.size(); j++) {
|
2020-03-05 02:52:51 +08:00
|
|
|
if (!filtered[i].isSubset(filtered[j])) {
|
2019-11-21 02:56:40 +08:00
|
|
|
sorted.push_back(filtered[i]);
|
|
|
|
}
|
2020-03-05 02:52:51 +08:00
|
|
|
i = j;
|
2019-10-17 13:44:47 +08:00
|
|
|
}
|
2019-11-21 02:56:40 +08:00
|
|
|
if (i < filtered.size()) {
|
|
|
|
sorted.push_back(filtered[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return sorted;
|
2019-10-17 13:44:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct ConvertParams {
|
|
|
|
std::string container_url;
|
2019-10-25 05:51:06 +08:00
|
|
|
Version begin = invalidVersion;
|
|
|
|
Version end = invalidVersion;
|
|
|
|
bool log_enabled = false;
|
|
|
|
std::string log_dir, trace_format, trace_log_group;
|
|
|
|
|
|
|
|
bool isValid() { return begin != invalidVersion && end != invalidVersion && !container_url.empty(); }
|
2019-10-17 13:44:47 +08:00
|
|
|
|
|
|
|
std::string toString() {
|
|
|
|
std::string s;
|
|
|
|
s.append("ContainerURL:");
|
|
|
|
s.append(container_url);
|
|
|
|
s.append(" Begin:");
|
|
|
|
s.append(format("%" PRId64, begin));
|
|
|
|
s.append(" End:");
|
|
|
|
s.append(format("%" PRId64, end));
|
2019-10-25 05:51:06 +08:00
|
|
|
if (log_enabled) {
|
|
|
|
if (!log_dir.empty()) {
|
|
|
|
s.append(" LogDir:").append(log_dir);
|
|
|
|
}
|
|
|
|
if (!trace_format.empty()) {
|
|
|
|
s.append(" Format:").append(trace_format);
|
|
|
|
}
|
|
|
|
if (!trace_log_group.empty()) {
|
|
|
|
s.append(" LogGroup:").append(trace_log_group);
|
|
|
|
}
|
|
|
|
}
|
2019-10-17 13:44:47 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
struct VersionedData {
|
|
|
|
LogMessageVersion version;
|
2019-11-21 00:58:45 +08:00
|
|
|
StringRef message; // Serialized mutation.
|
2019-11-16 05:44:43 +08:00
|
|
|
Arena arena; // The arena that contains mutation.
|
|
|
|
|
|
|
|
VersionedData() : version(invalidVersion, -1) {}
|
2019-11-21 00:58:45 +08:00
|
|
|
VersionedData(LogMessageVersion v, StringRef m, Arena a) : version(v), message(m), arena(a) {}
|
2019-11-16 05:44:43 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct MutationFilesReadProgress : public ReferenceCounted<MutationFilesReadProgress> {
|
|
|
|
MutationFilesReadProgress(std::vector<LogFile>& logs, Version begin, Version end)
|
|
|
|
: files(logs), beginVersion(begin), endVersion(end) {}
|
|
|
|
|
|
|
|
struct FileProgress : public ReferenceCounted<FileProgress> {
|
2019-11-22 07:21:01 +08:00
|
|
|
FileProgress(Reference<IAsyncFile> f, int index) : fd(f), idx(index), offset(0), eof(false) {}
|
2019-11-16 05:44:43 +08:00
|
|
|
|
|
|
|
bool operator<(const FileProgress& rhs) const {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (rhs.mutations.empty())
|
|
|
|
return true;
|
|
|
|
if (mutations.empty())
|
|
|
|
return false;
|
2019-11-16 05:44:43 +08:00
|
|
|
return mutations[0].version < rhs.mutations[0].version;
|
|
|
|
}
|
2019-11-21 00:58:45 +08:00
|
|
|
bool operator<=(const FileProgress& rhs) const {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (rhs.mutations.empty())
|
|
|
|
return true;
|
|
|
|
if (mutations.empty())
|
|
|
|
return false;
|
2019-11-21 00:58:45 +08:00
|
|
|
return mutations[0].version <= rhs.mutations[0].version;
|
|
|
|
}
|
2019-11-16 05:44:43 +08:00
|
|
|
bool empty() { return eof && mutations.empty(); }
|
|
|
|
|
2019-11-21 12:32:15 +08:00
|
|
|
// Decodes the block into mutations and save them if >= minVersion and < maxVersion.
|
2019-11-16 12:30:43 +08:00
|
|
|
// Returns true if new mutations has been saved.
|
2019-11-21 12:32:15 +08:00
|
|
|
bool decodeBlock(const Standalone<StringRef>& buf, int len, Version minVersion, Version maxVersion) {
|
2019-11-16 12:30:43 +08:00
|
|
|
StringRef block(buf.begin(), len);
|
|
|
|
StringRefReader reader(block, restore_corrupted_data());
|
2019-11-22 07:21:01 +08:00
|
|
|
int count = 0, inserted = 0;
|
|
|
|
Version msgVersion = invalidVersion;
|
2019-11-16 12:30:43 +08:00
|
|
|
|
|
|
|
try {
|
2020-02-15 03:27:02 +08:00
|
|
|
// Read block header
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reader.consume<int32_t>() != PARTITIONED_MLOG_VERSION)
|
|
|
|
throw restore_unsupported_file_version();
|
2020-02-15 03:27:02 +08:00
|
|
|
|
2019-11-16 12:30:43 +08:00
|
|
|
while (1) {
|
|
|
|
// If eof reached or first key len bytes is 0xFF then end of block was reached.
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reader.eof() || *reader.rptr == 0xFF)
|
|
|
|
break;
|
2019-11-16 12:30:43 +08:00
|
|
|
|
|
|
|
// Deserialize messages written in saveMutationsToFile().
|
2019-11-22 07:59:23 +08:00
|
|
|
msgVersion = bigEndian64(reader.consume<Version>());
|
|
|
|
uint32_t sub = bigEndian32(reader.consume<uint32_t>());
|
|
|
|
int msgSize = bigEndian32(reader.consume<int>());
|
2019-11-16 12:30:43 +08:00
|
|
|
const uint8_t* message = reader.consume(msgSize);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ArenaReader rd(
|
|
|
|
buf.arena(), StringRef(message, msgSize), AssumeVersion(g_network->protocolVersion()));
|
2019-11-16 12:30:43 +08:00
|
|
|
MutationRef m;
|
|
|
|
rd >> m;
|
|
|
|
count++;
|
2019-11-21 12:32:15 +08:00
|
|
|
if (msgVersion >= maxVersion) {
|
|
|
|
TraceEvent("FileDecodeEnd")
|
|
|
|
.detail("MaxV", maxVersion)
|
|
|
|
.detail("Version", msgVersion)
|
|
|
|
.detail("File", fd->getFilename());
|
|
|
|
eof = true;
|
|
|
|
break; // skip
|
|
|
|
}
|
2019-11-16 12:30:43 +08:00
|
|
|
if (msgVersion >= minVersion) {
|
2021-03-11 02:06:03 +08:00
|
|
|
mutations.emplace_back(
|
|
|
|
LogMessageVersion(msgVersion, sub), StringRef(message, msgSize), buf.arena());
|
2019-11-16 12:30:43 +08:00
|
|
|
inserted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
offset += len;
|
|
|
|
|
|
|
|
TraceEvent("Decoded")
|
|
|
|
.detail("Name", fd->getFilename())
|
|
|
|
.detail("Count", count)
|
|
|
|
.detail("Insert", inserted)
|
2019-11-22 07:21:01 +08:00
|
|
|
.detail("BlockOffset", reader.rptr - buf.begin())
|
2019-11-16 12:30:43 +08:00
|
|
|
.detail("Total", mutations.size())
|
2019-11-22 07:21:01 +08:00
|
|
|
.detail("EOF", eof)
|
|
|
|
.detail("Version", msgVersion)
|
2019-11-16 12:30:43 +08:00
|
|
|
.detail("NewOffset", offset);
|
|
|
|
return inserted > 0;
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevWarn, "CorruptLogFileBlock")
|
|
|
|
.error(e)
|
|
|
|
.detail("Filename", fd->getFilename())
|
|
|
|
.detail("BlockOffset", offset)
|
|
|
|
.detail("BlockLen", len)
|
|
|
|
.detail("ErrorRelativeOffset", reader.rptr - buf.begin())
|
|
|
|
.detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
Reference<IAsyncFile> fd;
|
2019-11-22 07:21:01 +08:00
|
|
|
int idx; // index in the MutationFilesReadProgress::files vector
|
2019-11-16 05:44:43 +08:00
|
|
|
int64_t offset; // offset of the file to be read
|
|
|
|
bool eof; // If EOF is seen so far or endVersion is encountered. If true, the file can't be read further.
|
2019-11-16 12:30:43 +08:00
|
|
|
std::vector<VersionedData> mutations; // Buffered mutations read so far
|
2019-11-16 05:44:43 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
bool hasMutations() {
|
|
|
|
for (const auto& fp : fileProgress) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!fp->empty())
|
|
|
|
return true;
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
|
|
|
return false;
|
2019-10-26 07:01:09 +08:00
|
|
|
}
|
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
void dumpProgress(std::string msg) {
|
2019-11-22 07:21:01 +08:00
|
|
|
std::cout << msg << "\n ";
|
2020-07-29 02:30:26 +08:00
|
|
|
for (const auto& fp : fileProgress) {
|
2019-11-16 05:44:43 +08:00
|
|
|
std::cout << fp->fd->getFilename() << " " << fp->mutations.size() << " mutations";
|
|
|
|
if (fp->mutations.size() > 0) {
|
|
|
|
std::cout << ", range " << fp->mutations[0].version.toString() << " "
|
|
|
|
<< fp->mutations.back().version.toString() << "\n";
|
2019-11-22 07:21:01 +08:00
|
|
|
} else {
|
|
|
|
std::cout << "\n\n";
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
// Sorts files according to their first mutation version and removes files without mutations.
|
|
|
|
void sortAndRemoveEmpty() {
|
2021-03-11 02:06:03 +08:00
|
|
|
std::sort(fileProgress.begin(),
|
|
|
|
fileProgress.end(),
|
2019-11-16 05:44:43 +08:00
|
|
|
[](const Reference<FileProgress>& a, const Reference<FileProgress>& b) { return (*a) < (*b); });
|
|
|
|
while (!fileProgress.empty() && fileProgress.back()->empty()) {
|
|
|
|
fileProgress.pop_back();
|
|
|
|
}
|
2019-10-26 07:01:09 +08:00
|
|
|
}
|
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
// Requires hasMutations() return true before calling this function.
|
|
|
|
// The caller must hold on the the arena associated with the mutation.
|
|
|
|
Future<VersionedData> getNextMutation() { return getMutationImpl(this); }
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
ACTOR static Future<VersionedData> getMutationImpl(MutationFilesReadProgress* self) {
|
|
|
|
ASSERT(!self->fileProgress.empty() && !self->fileProgress[0]->mutations.empty());
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
state Reference<FileProgress> fp = self->fileProgress[0];
|
|
|
|
state VersionedData data = fp->mutations[0];
|
|
|
|
fp->mutations.erase(fp->mutations.begin());
|
|
|
|
if (fp->mutations.empty()) {
|
|
|
|
// decode one more block
|
2019-11-22 07:21:01 +08:00
|
|
|
wait(decodeToVersion(fp, /*version=*/0, self->endVersion, self->getLogFile(fp->idx)));
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
if (fp->empty()) {
|
|
|
|
self->fileProgress.erase(self->fileProgress.begin());
|
|
|
|
} else {
|
|
|
|
// Keep fileProgress sorted
|
|
|
|
for (int i = 1; i < self->fileProgress.size(); i++) {
|
2019-11-21 00:58:45 +08:00
|
|
|
if (*self->fileProgress[i - 1] <= *self->fileProgress[i]) {
|
|
|
|
break;
|
|
|
|
}
|
2019-11-16 05:44:43 +08:00
|
|
|
std::swap(self->fileProgress[i - 1], self->fileProgress[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return data;
|
|
|
|
}
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-22 07:21:01 +08:00
|
|
|
LogFile& getLogFile(int index) { return files[index]; }
|
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
Future<Void> openLogFiles(Reference<IBackupContainer> container) { return openLogFilesImpl(this, container); }
|
|
|
|
|
|
|
|
// Opens log files in the progress and starts decoding until the beginVersion is seen.
|
|
|
|
ACTOR static Future<Void> openLogFilesImpl(MutationFilesReadProgress* progress,
|
|
|
|
Reference<IBackupContainer> container) {
|
|
|
|
state std::vector<Future<Reference<IAsyncFile>>> asyncFiles;
|
|
|
|
for (const auto& file : progress->files) {
|
|
|
|
asyncFiles.push_back(container->readFile(file.fileName));
|
2019-10-26 07:01:09 +08:00
|
|
|
}
|
2019-11-16 05:44:43 +08:00
|
|
|
wait(waitForAll(asyncFiles)); // open all files
|
|
|
|
|
|
|
|
// Attempt decode the first few blocks of log files until beginVersion is consumed
|
|
|
|
std::vector<Future<Void>> fileDecodes;
|
2019-11-22 07:21:01 +08:00
|
|
|
for (int i = 0; i < asyncFiles.size(); i++) {
|
2020-11-07 15:50:55 +08:00
|
|
|
auto fp = makeReference<FileProgress>(asyncFiles[i].get(), i);
|
2019-11-16 05:44:43 +08:00
|
|
|
progress->fileProgress.push_back(fp);
|
2019-11-22 07:21:01 +08:00
|
|
|
fileDecodes.push_back(
|
|
|
|
decodeToVersion(fp, progress->beginVersion, progress->endVersion, progress->getLogFile(i)));
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
wait(waitForAll(fileDecodes));
|
|
|
|
|
|
|
|
progress->sortAndRemoveEmpty();
|
2019-10-26 07:01:09 +08:00
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
2019-11-16 05:44:43 +08:00
|
|
|
|
2019-11-16 12:30:43 +08:00
|
|
|
// Decodes the file until EOF or an mutation >= minVersion and saves these mutations.
|
2019-11-21 12:32:15 +08:00
|
|
|
// Skip mutations >= maxVersion.
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> decodeToVersion(Reference<FileProgress> fp,
|
|
|
|
Version minVersion,
|
|
|
|
Version maxVersion,
|
2019-11-22 07:21:01 +08:00
|
|
|
LogFile file) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (fp->empty())
|
|
|
|
return Void();
|
2019-11-16 05:44:43 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!fp->mutations.empty() && fp->mutations.back().version.version >= minVersion)
|
|
|
|
return Void();
|
2019-11-16 05:44:43 +08:00
|
|
|
|
2020-02-04 02:42:05 +08:00
|
|
|
state int64_t len;
|
2019-11-16 05:44:43 +08:00
|
|
|
try {
|
2019-11-22 07:21:01 +08:00
|
|
|
// Read block by block until we see the minVersion
|
2019-11-16 12:30:43 +08:00
|
|
|
loop {
|
2020-02-04 02:42:05 +08:00
|
|
|
len = std::min<int64_t>(file.blockSize, file.fileSize - fp->offset);
|
2019-11-22 07:21:01 +08:00
|
|
|
if (len == 0) {
|
|
|
|
fp->eof = true;
|
2019-11-16 05:44:43 +08:00
|
|
|
return Void();
|
|
|
|
}
|
2019-11-22 07:21:01 +08:00
|
|
|
|
|
|
|
state Standalone<StringRef> buf = makeString(len);
|
|
|
|
int rLen = wait(fp->fd->read(mutateString(buf), len, fp->offset));
|
2021-03-11 02:06:03 +08:00
|
|
|
if (len != rLen)
|
|
|
|
throw restore_bad_read();
|
2019-11-22 07:21:01 +08:00
|
|
|
|
|
|
|
TraceEvent("ReadFile")
|
|
|
|
.detail("Name", fp->fd->getFilename())
|
|
|
|
.detail("Length", rLen)
|
|
|
|
.detail("Offset", fp->offset);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (fp->decodeBlock(buf, rLen, minVersion, maxVersion))
|
|
|
|
break;
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
2019-11-22 07:21:01 +08:00
|
|
|
TraceEvent(SevWarn, "CorruptedLogFileBlock")
|
2019-11-16 05:44:43 +08:00
|
|
|
.error(e)
|
2019-11-22 07:21:01 +08:00
|
|
|
.detail("Filename", fp->fd->getFilename())
|
|
|
|
.detail("BlockOffset", fp->offset)
|
2019-11-16 12:30:43 +08:00
|
|
|
.detail("BlockLen", len);
|
2019-11-16 05:44:43 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<LogFile> files;
|
|
|
|
const Version beginVersion, endVersion;
|
|
|
|
std::vector<Reference<FileProgress>> fileProgress;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Writes a log file in the old backup format, described in backup-dataFormat.md.
|
|
|
|
// This is similar to the LogFileWriter in FileBackupAgent.actor.cpp.
|
2019-11-21 12:32:15 +08:00
|
|
|
struct LogFileWriter {
|
|
|
|
LogFileWriter() : blockSize(-1) {}
|
2019-11-16 05:44:43 +08:00
|
|
|
LogFileWriter(Reference<IBackupFile> f, int bsize) : file(f), blockSize(bsize) {}
|
|
|
|
|
|
|
|
// Returns the block key, i.e., `Param1`, in the back file. The format is
|
|
|
|
// `hash_value|commitVersion|part`.
|
|
|
|
static Standalone<StringRef> getBlockKey(Version commitVersion, int part) {
|
|
|
|
const int32_t version = commitVersion / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE;
|
|
|
|
|
|
|
|
BinaryWriter wr(Unversioned());
|
|
|
|
wr << (uint8_t)hashlittle(&version, sizeof(version), 0);
|
|
|
|
wr << bigEndian64(commitVersion);
|
|
|
|
wr << bigEndian32(part);
|
|
|
|
return wr.toValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Start a new block if needed, then write the key and value
|
|
|
|
ACTOR static Future<Void> writeKV_impl(LogFileWriter* self, Key k, Value v) {
|
|
|
|
// If key and value do not fit in this block, end it and start a new one
|
|
|
|
int toWrite = sizeof(int32_t) + k.size() + sizeof(int32_t) + v.size();
|
|
|
|
if (self->file->size() + toWrite > self->blockEnd) {
|
|
|
|
// Write padding if needed
|
|
|
|
int bytesLeft = self->blockEnd - self->file->size();
|
|
|
|
if (bytesLeft > 0) {
|
2020-04-28 04:59:45 +08:00
|
|
|
state Value paddingFFs = fileBackup::makePadding(bytesLeft);
|
2019-11-16 05:44:43 +08:00
|
|
|
wait(self->file->append(paddingFFs.begin(), bytesLeft));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set new blockEnd
|
|
|
|
self->blockEnd += self->blockSize;
|
|
|
|
|
|
|
|
// write Header
|
2019-11-26 13:00:13 +08:00
|
|
|
wait(self->file->append((uint8_t*)&BACKUP_AGENT_MLOG_VERSION, sizeof(BACKUP_AGENT_MLOG_VERSION)));
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
wait(self->file->appendStringRefWithLen(k));
|
|
|
|
wait(self->file->appendStringRefWithLen(v));
|
|
|
|
|
|
|
|
// At this point we should be in whatever the current block is or the block size is too small
|
2021-03-11 02:06:03 +08:00
|
|
|
if (self->file->size() > self->blockEnd)
|
|
|
|
throw backup_bad_block_size();
|
2019-11-16 05:44:43 +08:00
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Void> writeKV(Key k, Value v) { return writeKV_impl(this, k, v); }
|
|
|
|
|
|
|
|
// Adds a new mutation to an interal buffer and writes out when encountering
|
|
|
|
// a new commitVersion or exceeding the block size.
|
|
|
|
ACTOR static Future<Void> addMutation(LogFileWriter* self, Version commitVersion, MutationListRef mutations) {
|
|
|
|
state Standalone<StringRef> value = BinaryWriter::toValue(mutations, IncludeVersion());
|
|
|
|
|
|
|
|
state int part = 0;
|
|
|
|
for (; part * CLIENT_KNOBS->MUTATION_BLOCK_SIZE < value.size(); part++) {
|
|
|
|
StringRef partBuf = value.substr(
|
|
|
|
part * CLIENT_KNOBS->MUTATION_BLOCK_SIZE,
|
|
|
|
std::min(value.size() - part * CLIENT_KNOBS->MUTATION_BLOCK_SIZE, CLIENT_KNOBS->MUTATION_BLOCK_SIZE));
|
|
|
|
Standalone<StringRef> key = getBlockKey(commitVersion, part);
|
|
|
|
wait(writeKV_impl(self, key, partBuf));
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Reference<IBackupFile> file;
|
|
|
|
int blockSize;
|
|
|
|
int64_t blockEnd = 0;
|
|
|
|
};
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-22 07:21:01 +08:00
|
|
|
ACTOR Future<Void> convert(ConvertParams params) {
|
2019-10-17 13:44:47 +08:00
|
|
|
state Reference<IBackupContainer> container = IBackupContainer::openContainer(params.container_url);
|
|
|
|
state BackupFileList listing = wait(container->dumpFileList());
|
|
|
|
std::sort(listing.logs.begin(), listing.logs.end());
|
2019-10-25 05:51:06 +08:00
|
|
|
TraceEvent("Container").detail("URL", params.container_url).detail("Logs", listing.logs.size());
|
2019-11-16 05:44:43 +08:00
|
|
|
state BackupDescription desc = wait(container->describeBackup());
|
|
|
|
std::cout << "\n" << desc.toString() << "\n";
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// std::cout << "Using Protocol Version: 0x" << std::hex << g_network->protocolVersion().version() << std::dec <<
|
|
|
|
// "\n";
|
2019-11-21 00:58:45 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
std::vector<LogFile> logs = getRelevantLogFiles(listing.logs, params.begin, params.end);
|
|
|
|
printLogFiles("Range has", logs);
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
state Reference<MutationFilesReadProgress> progress(new MutationFilesReadProgress(logs, params.begin, params.end));
|
2019-10-26 07:01:09 +08:00
|
|
|
|
2019-11-16 05:44:43 +08:00
|
|
|
wait(progress->openLogFiles(container));
|
|
|
|
|
2019-11-21 12:32:15 +08:00
|
|
|
state int blockSize = CLIENT_KNOBS->BACKUP_LOGFILE_BLOCK_SIZE;
|
|
|
|
state Reference<IBackupFile> outFile = wait(container->writeLogFile(params.begin, params.end, blockSize));
|
|
|
|
state LogFileWriter logFile(outFile, blockSize);
|
|
|
|
std::cout << "Output file: " << outFile->getFileName() << "\n";
|
|
|
|
|
|
|
|
state MutationList list;
|
|
|
|
state Arena arena;
|
|
|
|
state Version version = invalidVersion;
|
2019-11-16 05:44:43 +08:00
|
|
|
while (progress->hasMutations()) {
|
2019-11-21 12:32:15 +08:00
|
|
|
state VersionedData data = wait(progress->getNextMutation());
|
|
|
|
|
|
|
|
// emit a mutation batch to file when encounter a new version
|
|
|
|
if (list.totalSize() > 0 && version != data.version.version) {
|
|
|
|
wait(LogFileWriter::addMutation(&logFile, version, list));
|
|
|
|
list = MutationList();
|
|
|
|
arena = Arena();
|
|
|
|
}
|
|
|
|
|
2020-09-29 01:58:49 +08:00
|
|
|
ArenaReader rd(data.arena, data.message, AssumeVersion(g_network->protocolVersion()));
|
2019-11-21 00:58:45 +08:00
|
|
|
MutationRef m;
|
|
|
|
rd >> m;
|
|
|
|
std::cout << data.version.toString() << " m = " << m.toString() << "\n";
|
2019-11-21 12:32:15 +08:00
|
|
|
list.push_back_deep(arena, m);
|
|
|
|
version = data.version.version;
|
2019-11-16 05:44:43 +08:00
|
|
|
}
|
2019-11-21 12:32:15 +08:00
|
|
|
if (list.totalSize() > 0) {
|
|
|
|
wait(LogFileWriter::addMutation(&logFile, version, list));
|
|
|
|
}
|
|
|
|
|
|
|
|
wait(outFile->finish());
|
2019-10-27 00:27:29 +08:00
|
|
|
|
2019-10-17 13:44:47 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
int parseCommandLine(ConvertParams* param, CSimpleOpt* args) {
|
|
|
|
while (args->Next()) {
|
|
|
|
auto lastError = args->LastError();
|
|
|
|
switch (lastError) {
|
2019-10-25 05:54:38 +08:00
|
|
|
case SO_SUCCESS:
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-10-25 05:54:38 +08:00
|
|
|
default:
|
2019-11-22 07:21:01 +08:00
|
|
|
std::cerr << "ERROR: argument given for option: " << args->OptionText() << "\n";
|
2019-10-25 05:54:38 +08:00
|
|
|
return FDB_EXIT_ERROR;
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int optId = args->OptionId();
|
|
|
|
const char* arg = args->OptionArg();
|
|
|
|
switch (optId) {
|
2019-10-25 05:51:06 +08:00
|
|
|
case OPT_HELP:
|
|
|
|
printConvertUsage();
|
|
|
|
return FDB_EXIT_ERROR;
|
|
|
|
|
|
|
|
case OPT_BEGIN_VERSION:
|
|
|
|
if (!sscanf(arg, "%" SCNd64, ¶m->begin)) {
|
|
|
|
std::cerr << "ERROR: could not parse begin version " << arg << "\n";
|
2019-10-17 13:44:47 +08:00
|
|
|
printConvertUsage();
|
|
|
|
return FDB_EXIT_ERROR;
|
2019-10-25 05:51:06 +08:00
|
|
|
}
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-10-25 05:51:06 +08:00
|
|
|
case OPT_END_VERSION:
|
|
|
|
if (!sscanf(arg, "%" SCNd64, ¶m->end)) {
|
|
|
|
std::cerr << "ERROR: could not parse end version " << arg << "\n";
|
|
|
|
printConvertUsage();
|
|
|
|
return FDB_EXIT_ERROR;
|
|
|
|
}
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-10-25 05:51:06 +08:00
|
|
|
case OPT_CONTAINER:
|
|
|
|
param->container_url = args->OptionArg();
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-10-25 05:51:06 +08:00
|
|
|
case OPT_TRACE:
|
|
|
|
param->log_enabled = true;
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-10-25 05:51:06 +08:00
|
|
|
case OPT_TRACE_DIR:
|
|
|
|
param->log_dir = args->OptionArg();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OPT_TRACE_FORMAT:
|
|
|
|
if (!validateTraceFormat(args->OptionArg())) {
|
|
|
|
std::cerr << "ERROR: Unrecognized trace format " << args->OptionArg() << "\n";
|
|
|
|
return FDB_EXIT_ERROR;
|
|
|
|
}
|
|
|
|
param->trace_format = args->OptionArg();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case OPT_TRACE_LOG_GROUP:
|
|
|
|
param->trace_log_group = args->OptionArg();
|
|
|
|
break;
|
2020-09-11 08:02:24 +08:00
|
|
|
case OPT_BUILD_FLAGS:
|
|
|
|
printBuildInformation();
|
|
|
|
return FDB_EXIT_ERROR;
|
|
|
|
break;
|
2019-10-17 13:44:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return FDB_EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
} // namespace file_converter
|
2019-10-27 00:27:29 +08:00
|
|
|
|
2019-10-17 13:44:47 +08:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
try {
|
2019-10-27 00:27:29 +08:00
|
|
|
CSimpleOpt* args = new CSimpleOpt(argc, argv, file_converter::gConverterOptions, SO_O_EXACT);
|
|
|
|
file_converter::ConvertParams param;
|
|
|
|
int status = file_converter::parseCommandLine(¶m, args);
|
2019-10-17 13:44:47 +08:00
|
|
|
std::cout << "Params: " << param.toString() << "\n";
|
2019-10-25 05:51:06 +08:00
|
|
|
if (status != FDB_EXIT_SUCCESS || !param.isValid()) {
|
2019-10-27 00:27:29 +08:00
|
|
|
file_converter::printConvertUsage();
|
2019-10-25 05:51:06 +08:00
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (param.log_enabled) {
|
|
|
|
if (param.log_dir.empty()) {
|
|
|
|
setNetworkOption(FDBNetworkOptions::TRACE_ENABLE);
|
|
|
|
} else {
|
|
|
|
setNetworkOption(FDBNetworkOptions::TRACE_ENABLE, StringRef(param.log_dir));
|
|
|
|
}
|
|
|
|
if (!param.trace_format.empty()) {
|
|
|
|
setNetworkOption(FDBNetworkOptions::TRACE_FORMAT, StringRef(param.trace_format));
|
|
|
|
}
|
|
|
|
if (!param.trace_log_group.empty()) {
|
|
|
|
setNetworkOption(FDBNetworkOptions::TRACE_LOG_GROUP, StringRef(param.trace_log_group));
|
|
|
|
}
|
|
|
|
}
|
2019-10-17 13:44:47 +08:00
|
|
|
|
|
|
|
platformInit();
|
|
|
|
Error::init();
|
|
|
|
|
|
|
|
StringRef url(param.container_url);
|
2019-10-26 07:01:09 +08:00
|
|
|
setupNetwork(0, true);
|
2019-10-17 13:44:47 +08:00
|
|
|
|
|
|
|
TraceEvent::setNetworkThread();
|
2019-10-25 05:51:06 +08:00
|
|
|
openTraceFile(NetworkAddress(), 10 << 20, 10 << 20, param.log_dir, "convert", param.trace_log_group);
|
2019-10-17 13:44:47 +08:00
|
|
|
|
2019-11-22 07:21:01 +08:00
|
|
|
auto f = stopAfter(convert(param));
|
2019-10-26 07:01:09 +08:00
|
|
|
|
|
|
|
runNetwork();
|
2019-10-17 13:44:47 +08:00
|
|
|
return status;
|
2019-10-25 05:54:38 +08:00
|
|
|
} catch (Error& e) {
|
2019-10-17 13:44:47 +08:00
|
|
|
fprintf(stderr, "ERROR: %s\n", e.what());
|
|
|
|
return FDB_EXIT_ERROR;
|
|
|
|
} catch (std::exception& e) {
|
|
|
|
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
|
|
|
return FDB_EXIT_MAIN_EXCEPTION;
|
|
|
|
}
|
2020-02-04 02:42:05 +08:00
|
|
|
}
|