diff --git a/fdbbackup/FileConverter.actor.cpp b/fdbbackup/FileConverter.actor.cpp index 8e7623c7aa..0bc4aaa9fc 100644 --- a/fdbbackup/FileConverter.actor.cpp +++ b/fdbbackup/FileConverter.actor.cpp @@ -56,16 +56,6 @@ void printConvertUsage() { return; } -std::vector getRelevantLogFiles(const std::vector& files, Version begin, Version end) { - std::vector results; - for (const auto& file : files) { - if (file.beginVersion <= end && file.endVersion >= begin) { - results.push_back(file); - } - } - return results; -} - void printLogFiles(std::string msg, const std::vector& files) { std::cout << msg << " " << files.size() << " log files\n"; for (const auto& file : files) { @@ -74,6 +64,34 @@ void printLogFiles(std::string msg, const std::vector& files) { std::cout << std::endl; } +std::vector getRelevantLogFiles(const std::vector& files, Version begin, Version end) { + std::vector filtered; + for (const auto& file : files) { + if (file.beginVersion <= end && file.endVersion >= begin) { + filtered.push_back(file); + } + } + std::sort(filtered.begin(), filtered.end()); + + // Remove duplicates. This is because backup workers may store the log for + // old epochs successfully, but do not update the progress before another + // recovery happened. As a result, next epoch will retry and creates + // duplicated log files. + std::vector sorted; + int i = 0; + for (int j = 1; j < filtered.size(); j++) { + if (!filtered[i].sameContent(filtered[j])) { + sorted.push_back(filtered[i]); + i = j; + } + } + if (i < filtered.size()) { + sorted.push_back(filtered[i]); + } + + return sorted; +} + struct ConvertParams { std::string container_url; Version begin = invalidVersion;