Fix duplicate file removal for subset version ranges
Partitioned logs can have strict subset version ranges, which was not properly handled -- we used to assume overlapping only happens for the same begin version.
This commit is contained in:
parent
1a1f572f29
commit
4c75c61f39
|
@ -1156,11 +1156,13 @@ public:
|
|||
// If a log file's progress is not saved, a new log file will be generated
|
||||
// with the same begin version. So we can have a file that contains a subset
|
||||
// of contents in another log file.
|
||||
// PRE-CONDITION: logs are already sorted.
|
||||
// PRE-CONDITION: logs are already sorted by (tagId, beginVersion, endVersion).
|
||||
static std::vector<LogFile> filterDuplicates(const std::vector<LogFile>& logs) {
|
||||
std::vector<LogFile> filtered;
|
||||
int i = 0;
|
||||
for (int j = 1; j < logs.size(); j++) {
|
||||
if (logs[j].isSubset(logs[i])) continue;
|
||||
|
||||
if (!logs[i].isSubset(logs[j])) {
|
||||
filtered.push_back(logs[i]);
|
||||
}
|
||||
|
@ -1196,9 +1198,13 @@ public:
|
|||
for (int i = 0; i < logs.size(); i++) {
|
||||
ASSERT(logs[i].tagId >= 0 && logs[i].tagId < logs[i].totalTags);
|
||||
auto& indices = tagIndices[logs[i].tagId];
|
||||
// filter out if indices.back() is subset of files[i]
|
||||
if (!indices.empty() && logs[indices.back()].isSubset(logs[i])) {
|
||||
indices.back() = i;
|
||||
// filter out if indices.back() is subset of files[i] or vice versa
|
||||
if (!indices.empty()) {
|
||||
if (logs[indices.back()].isSubset(logs[i])) {
|
||||
indices.back() = i;
|
||||
} else if (!logs[i].isSubset(logs[indices.back()])) {
|
||||
indices.push_back(i);
|
||||
}
|
||||
} else {
|
||||
indices.push_back(i);
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ struct LogFile {
|
|||
// Returns if this log file contains a subset of content of the given file
|
||||
// by comparing version range and tag ID.
|
||||
bool isSubset(const LogFile& rhs) const {
|
||||
return beginVersion == rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
||||
return beginVersion >= rhs.beginVersion && endVersion <= rhs.endVersion && tagId == rhs.tagId;
|
||||
}
|
||||
|
||||
std::string toString() const {
|
||||
|
|
Loading…
Reference in New Issue